optimize.c revision 190225
117683Spst/* 217683Spst * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996 317683Spst * The Regents of the University of California. All rights reserved. 417683Spst * 517683Spst * Redistribution and use in source and binary forms, with or without 617683Spst * modification, are permitted provided that: (1) source code distributions 717683Spst * retain the above copyright notice and this paragraph in its entirety, (2) 817683Spst * distributions including binary code include the above copyright notice and 917683Spst * this paragraph in its entirety in the documentation or other materials 1017683Spst * provided with the distribution, and (3) all advertising materials mentioning 1117683Spst * features or use of this software display the following acknowledgement: 1217683Spst * ``This product includes software developed by the University of California, 1317683Spst * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of 1417683Spst * the University nor the names of its contributors may be used to endorse 1517683Spst * or promote products derived from this software without specific prior 1617683Spst * written permission. 1717683Spst * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 1817683Spst * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 1917683Spst * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 2017683Spst * 2117683Spst * Optimization module for tcpdump intermediate representation. 2217683Spst */ 2317683Spst#ifndef lint 24127664Sbmsstatic const char rcsid[] _U_ = 25190225Srpaulo "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.90.2.1 2008/01/02 04:22:16 guy Exp $ (LBL)"; 2617683Spst#endif 2717683Spst 2875107Sfenner#ifdef HAVE_CONFIG_H 2975107Sfenner#include "config.h" 3075107Sfenner#endif 3175107Sfenner 3217683Spst#include <stdio.h> 3317683Spst#include <stdlib.h> 3417683Spst#include <memory.h> 35146768Ssam#include <string.h> 3617683Spst 3775107Sfenner#include <errno.h> 3875107Sfenner 3917683Spst#include "pcap-int.h" 4017683Spst 4117683Spst#include "gencode.h" 4217683Spst 4317683Spst#ifdef HAVE_OS_PROTO_H 4417683Spst#include "os-proto.h" 4517683Spst#endif 4617683Spst 4717683Spst#ifdef BDEBUG 4817683Spstextern int dflag; 4917683Spst#endif 5017683Spst 51146768Ssam#if defined(MSDOS) && !defined(__DJGPP__) 52146768Ssamextern int _w32_ffs (int mask); 53146768Ssam#define ffs _w32_ffs 54146768Ssam#endif 5517683Spst 56190225Srpaulo#if defined(WIN32) && defined (_MSC_VER) 57190225Srpauloint ffs(int mask); 58190225Srpaulo#endif 59190225Srpaulo 60146768Ssam/* 61146768Ssam * Represents a deleted instruction. 62146768Ssam */ 6317683Spst#define NOP -1 6417683Spst 6517683Spst/* 66146768Ssam * Register numbers for use-def values. 67146768Ssam * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory 68146768Ssam * location. A_ATOM is the accumulator and X_ATOM is the index 69146768Ssam * register. 70146768Ssam */ 71146768Ssam#define A_ATOM BPF_MEMWORDS 72146768Ssam#define X_ATOM (BPF_MEMWORDS+1) 73146768Ssam 74146768Ssam/* 7517683Spst * This define is used to represent *both* the accumulator and 7617683Spst * x register in use-def computations. 7717683Spst * Currently, the use-def code assumes only one definition per instruction. 7817683Spst */ 7917683Spst#define AX_ATOM N_ATOMS 8017683Spst 8117683Spst/* 8217683Spst * A flag to indicate that further optimization is needed. 8317683Spst * Iterative passes are continued until a given pass yields no 8417683Spst * branch movement. 8517683Spst */ 8617683Spststatic int done; 8717683Spst 8817683Spst/* 8917683Spst * A block is marked if only if its mark equals the current mark. 9017683Spst * Rather than traverse the code array, marking each item, 'cur_mark' is 9117683Spst * incremented. This automatically makes each element unmarked. 9217683Spst */ 9317683Spststatic int cur_mark; 9417683Spst#define isMarked(p) ((p)->mark == cur_mark) 9517683Spst#define unMarkAll() cur_mark += 1 9617683Spst#define Mark(p) ((p)->mark = cur_mark) 9717683Spst 9817683Spststatic void opt_init(struct block *); 9917683Spststatic void opt_cleanup(void); 10017683Spst 10117683Spststatic void make_marks(struct block *); 10217683Spststatic void mark_code(struct block *); 10317683Spst 10417683Spststatic void intern_blocks(struct block *); 10517683Spst 10617683Spststatic int eq_slist(struct slist *, struct slist *); 10717683Spst 10817683Spststatic void find_levels_r(struct block *); 10917683Spst 11017683Spststatic void find_levels(struct block *); 11117683Spststatic void find_dom(struct block *); 11217683Spststatic void propedom(struct edge *); 11317683Spststatic void find_edom(struct block *); 11417683Spststatic void find_closure(struct block *); 11517683Spststatic int atomuse(struct stmt *); 11617683Spststatic int atomdef(struct stmt *); 11717683Spststatic void compute_local_ud(struct block *); 11817683Spststatic void find_ud(struct block *); 11917683Spststatic void init_val(void); 12017683Spststatic int F(int, int, int); 12117683Spststatic inline void vstore(struct stmt *, int *, int, int); 12217683Spststatic void opt_blk(struct block *, int); 12317683Spststatic int use_conflict(struct block *, struct block *); 12417683Spststatic void opt_j(struct edge *); 12517683Spststatic void or_pullup(struct block *); 12617683Spststatic void and_pullup(struct block *); 12717683Spststatic void opt_blks(struct block *, int); 12817683Spststatic inline void link_inedge(struct edge *, struct block *); 12917683Spststatic void find_inedges(struct block *); 13017683Spststatic void opt_root(struct block **); 13117683Spststatic void opt_loop(struct block *, int); 13217683Spststatic void fold_op(struct stmt *, int, int); 13317683Spststatic inline struct slist *this_op(struct slist *); 13417683Spststatic void opt_not(struct block *); 13517683Spststatic void opt_peep(struct block *); 13617683Spststatic void opt_stmt(struct stmt *, int[], int); 13717683Spststatic void deadstmt(struct stmt *, struct stmt *[]); 13817683Spststatic void opt_deadstores(struct block *); 13917683Spststatic struct block *fold_edge(struct block *, struct edge *); 14017683Spststatic inline int eq_blk(struct block *, struct block *); 14117683Spststatic int slength(struct slist *); 14217683Spststatic int count_blocks(struct block *); 14317683Spststatic void number_blks_r(struct block *); 14417683Spststatic int count_stmts(struct block *); 14517683Spststatic int convert_code_r(struct block *); 14617683Spst#ifdef BDEBUG 14717683Spststatic void opt_dump(struct block *); 14817683Spst#endif 14917683Spst 15017683Spststatic int n_blocks; 15117683Spststruct block **blocks; 15217683Spststatic int n_edges; 15317683Spststruct edge **edges; 15417683Spst 15517683Spst/* 15617683Spst * A bit vector set representation of the dominators. 15717683Spst * We round up the set size to the next power of two. 15817683Spst */ 15917683Spststatic int nodewords; 16017683Spststatic int edgewords; 16117683Spststruct block **levels; 16217683Spstbpf_u_int32 *space; 16317683Spst#define BITS_PER_WORD (8*sizeof(bpf_u_int32)) 16417683Spst/* 16517683Spst * True if a is in uset {p} 16617683Spst */ 16717683Spst#define SET_MEMBER(p, a) \ 16817683Spst((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD))) 16917683Spst 17017683Spst/* 17117683Spst * Add 'a' to uset p. 17217683Spst */ 17317683Spst#define SET_INSERT(p, a) \ 17417683Spst(p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD)) 17517683Spst 17617683Spst/* 17717683Spst * Delete 'a' from uset p. 17817683Spst */ 17917683Spst#define SET_DELETE(p, a) \ 18017683Spst(p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD)) 18117683Spst 18217683Spst/* 18317683Spst * a := a intersect b 18417683Spst */ 18517683Spst#define SET_INTERSECT(a, b, n)\ 18617683Spst{\ 18717683Spst register bpf_u_int32 *_x = a, *_y = b;\ 18817683Spst register int _n = n;\ 18917683Spst while (--_n >= 0) *_x++ &= *_y++;\ 19017683Spst} 19117683Spst 19217683Spst/* 19317683Spst * a := a - b 19417683Spst */ 19517683Spst#define SET_SUBTRACT(a, b, n)\ 19617683Spst{\ 19717683Spst register bpf_u_int32 *_x = a, *_y = b;\ 19817683Spst register int _n = n;\ 19917683Spst while (--_n >= 0) *_x++ &=~ *_y++;\ 20017683Spst} 20117683Spst 20217683Spst/* 20317683Spst * a := a union b 20417683Spst */ 20517683Spst#define SET_UNION(a, b, n)\ 20617683Spst{\ 20717683Spst register bpf_u_int32 *_x = a, *_y = b;\ 20817683Spst register int _n = n;\ 20917683Spst while (--_n >= 0) *_x++ |= *_y++;\ 21017683Spst} 21117683Spst 21217683Spststatic uset all_dom_sets; 21317683Spststatic uset all_closure_sets; 21417683Spststatic uset all_edge_sets; 21517683Spst 21617683Spst#ifndef MAX 21717683Spst#define MAX(a,b) ((a)>(b)?(a):(b)) 21817683Spst#endif 21917683Spst 22017683Spststatic void 22117683Spstfind_levels_r(b) 22217683Spst struct block *b; 22317683Spst{ 22417683Spst int level; 22517683Spst 22617683Spst if (isMarked(b)) 22717683Spst return; 22817683Spst 22917683Spst Mark(b); 23017683Spst b->link = 0; 23117683Spst 23217683Spst if (JT(b)) { 23317683Spst find_levels_r(JT(b)); 23417683Spst find_levels_r(JF(b)); 23517683Spst level = MAX(JT(b)->level, JF(b)->level) + 1; 23617683Spst } else 23717683Spst level = 0; 23817683Spst b->level = level; 23917683Spst b->link = levels[level]; 24017683Spst levels[level] = b; 24117683Spst} 24217683Spst 24317683Spst/* 24417683Spst * Level graph. The levels go from 0 at the leaves to 24517683Spst * N_LEVELS at the root. The levels[] array points to the 24617683Spst * first node of the level list, whose elements are linked 24717683Spst * with the 'link' field of the struct block. 24817683Spst */ 24917683Spststatic void 25017683Spstfind_levels(root) 25117683Spst struct block *root; 25217683Spst{ 25317683Spst memset((char *)levels, 0, n_blocks * sizeof(*levels)); 25417683Spst unMarkAll(); 25517683Spst find_levels_r(root); 25617683Spst} 25717683Spst 25817683Spst/* 25917683Spst * Find dominator relationships. 26017683Spst * Assumes graph has been leveled. 26117683Spst */ 26217683Spststatic void 26317683Spstfind_dom(root) 26417683Spst struct block *root; 26517683Spst{ 26617683Spst int i; 26717683Spst struct block *b; 26817683Spst bpf_u_int32 *x; 26917683Spst 27017683Spst /* 27117683Spst * Initialize sets to contain all nodes. 27217683Spst */ 27317683Spst x = all_dom_sets; 27417683Spst i = n_blocks * nodewords; 27517683Spst while (--i >= 0) 27617683Spst *x++ = ~0; 27717683Spst /* Root starts off empty. */ 27817683Spst for (i = nodewords; --i >= 0;) 27917683Spst root->dom[i] = 0; 28017683Spst 28117683Spst /* root->level is the highest level no found. */ 28217683Spst for (i = root->level; i >= 0; --i) { 28317683Spst for (b = levels[i]; b; b = b->link) { 28417683Spst SET_INSERT(b->dom, b->id); 28517683Spst if (JT(b) == 0) 28617683Spst continue; 28717683Spst SET_INTERSECT(JT(b)->dom, b->dom, nodewords); 28817683Spst SET_INTERSECT(JF(b)->dom, b->dom, nodewords); 28917683Spst } 29017683Spst } 29117683Spst} 29217683Spst 29317683Spststatic void 29417683Spstpropedom(ep) 29517683Spst struct edge *ep; 29617683Spst{ 29717683Spst SET_INSERT(ep->edom, ep->id); 29817683Spst if (ep->succ) { 29917683Spst SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords); 30017683Spst SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords); 30117683Spst } 30217683Spst} 30317683Spst 30417683Spst/* 30517683Spst * Compute edge dominators. 30617683Spst * Assumes graph has been leveled and predecessors established. 30717683Spst */ 30817683Spststatic void 30917683Spstfind_edom(root) 31017683Spst struct block *root; 31117683Spst{ 31217683Spst int i; 31317683Spst uset x; 31417683Spst struct block *b; 31517683Spst 31617683Spst x = all_edge_sets; 31717683Spst for (i = n_edges * edgewords; --i >= 0; ) 31817683Spst x[i] = ~0; 31917683Spst 32017683Spst /* root->level is the highest level no found. */ 32117683Spst memset(root->et.edom, 0, edgewords * sizeof(*(uset)0)); 32217683Spst memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0)); 32317683Spst for (i = root->level; i >= 0; --i) { 32417683Spst for (b = levels[i]; b != 0; b = b->link) { 32517683Spst propedom(&b->et); 32617683Spst propedom(&b->ef); 32717683Spst } 32817683Spst } 32917683Spst} 33017683Spst 33117683Spst/* 33217683Spst * Find the backwards transitive closure of the flow graph. These sets 33317683Spst * are backwards in the sense that we find the set of nodes that reach 33417683Spst * a given node, not the set of nodes that can be reached by a node. 33517683Spst * 33617683Spst * Assumes graph has been leveled. 33717683Spst */ 33817683Spststatic void 33917683Spstfind_closure(root) 34017683Spst struct block *root; 34117683Spst{ 34217683Spst int i; 34317683Spst struct block *b; 34417683Spst 34517683Spst /* 34617683Spst * Initialize sets to contain no nodes. 34717683Spst */ 34817683Spst memset((char *)all_closure_sets, 0, 34917683Spst n_blocks * nodewords * sizeof(*all_closure_sets)); 35017683Spst 35117683Spst /* root->level is the highest level no found. */ 35217683Spst for (i = root->level; i >= 0; --i) { 35317683Spst for (b = levels[i]; b; b = b->link) { 35417683Spst SET_INSERT(b->closure, b->id); 35517683Spst if (JT(b) == 0) 35617683Spst continue; 35717683Spst SET_UNION(JT(b)->closure, b->closure, nodewords); 35817683Spst SET_UNION(JF(b)->closure, b->closure, nodewords); 35917683Spst } 36017683Spst } 36117683Spst} 36217683Spst 36317683Spst/* 36417683Spst * Return the register number that is used by s. If A and X are both 36517683Spst * used, return AX_ATOM. If no register is used, return -1. 36617683Spst * 36717683Spst * The implementation should probably change to an array access. 36817683Spst */ 36917683Spststatic int 37017683Spstatomuse(s) 37117683Spst struct stmt *s; 37217683Spst{ 37317683Spst register int c = s->code; 37417683Spst 37517683Spst if (c == NOP) 37617683Spst return -1; 37717683Spst 37817683Spst switch (BPF_CLASS(c)) { 37917683Spst 38017683Spst case BPF_RET: 38117683Spst return (BPF_RVAL(c) == BPF_A) ? A_ATOM : 38217683Spst (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1; 38317683Spst 38417683Spst case BPF_LD: 38517683Spst case BPF_LDX: 38617683Spst return (BPF_MODE(c) == BPF_IND) ? X_ATOM : 38717683Spst (BPF_MODE(c) == BPF_MEM) ? s->k : -1; 38817683Spst 38917683Spst case BPF_ST: 39017683Spst return A_ATOM; 39117683Spst 39217683Spst case BPF_STX: 39317683Spst return X_ATOM; 39417683Spst 39517683Spst case BPF_JMP: 39617683Spst case BPF_ALU: 39717683Spst if (BPF_SRC(c) == BPF_X) 39817683Spst return AX_ATOM; 39917683Spst return A_ATOM; 40017683Spst 40117683Spst case BPF_MISC: 40217683Spst return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM; 40317683Spst } 40417683Spst abort(); 40517683Spst /* NOTREACHED */ 40617683Spst} 40717683Spst 40817683Spst/* 40917683Spst * Return the register number that is defined by 's'. We assume that 41017683Spst * a single stmt cannot define more than one register. If no register 41117683Spst * is defined, return -1. 41217683Spst * 41317683Spst * The implementation should probably change to an array access. 41417683Spst */ 41517683Spststatic int 41617683Spstatomdef(s) 41717683Spst struct stmt *s; 41817683Spst{ 41917683Spst if (s->code == NOP) 42017683Spst return -1; 42117683Spst 42217683Spst switch (BPF_CLASS(s->code)) { 42317683Spst 42417683Spst case BPF_LD: 42517683Spst case BPF_ALU: 42617683Spst return A_ATOM; 42717683Spst 42817683Spst case BPF_LDX: 42917683Spst return X_ATOM; 43017683Spst 43117683Spst case BPF_ST: 43217683Spst case BPF_STX: 43317683Spst return s->k; 43417683Spst 43517683Spst case BPF_MISC: 43617683Spst return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM; 43717683Spst } 43817683Spst return -1; 43917683Spst} 44017683Spst 441146768Ssam/* 442146768Ssam * Compute the sets of registers used, defined, and killed by 'b'. 443146768Ssam * 444146768Ssam * "Used" means that a statement in 'b' uses the register before any 445146768Ssam * statement in 'b' defines it, i.e. it uses the value left in 446146768Ssam * that register by a predecessor block of this block. 447146768Ssam * "Defined" means that a statement in 'b' defines it. 448146768Ssam * "Killed" means that a statement in 'b' defines it before any 449146768Ssam * statement in 'b' uses it, i.e. it kills the value left in that 450146768Ssam * register by a predecessor block of this block. 451146768Ssam */ 45217683Spststatic void 45317683Spstcompute_local_ud(b) 45417683Spst struct block *b; 45517683Spst{ 45617683Spst struct slist *s; 45717683Spst atomset def = 0, use = 0, kill = 0; 45817683Spst int atom; 45917683Spst 46017683Spst for (s = b->stmts; s; s = s->next) { 46117683Spst if (s->s.code == NOP) 46217683Spst continue; 46317683Spst atom = atomuse(&s->s); 46417683Spst if (atom >= 0) { 46517683Spst if (atom == AX_ATOM) { 46617683Spst if (!ATOMELEM(def, X_ATOM)) 46717683Spst use |= ATOMMASK(X_ATOM); 46817683Spst if (!ATOMELEM(def, A_ATOM)) 46917683Spst use |= ATOMMASK(A_ATOM); 47017683Spst } 47117683Spst else if (atom < N_ATOMS) { 47217683Spst if (!ATOMELEM(def, atom)) 47317683Spst use |= ATOMMASK(atom); 47417683Spst } 47517683Spst else 47617683Spst abort(); 47717683Spst } 47817683Spst atom = atomdef(&s->s); 47917683Spst if (atom >= 0) { 48017683Spst if (!ATOMELEM(use, atom)) 48117683Spst kill |= ATOMMASK(atom); 48217683Spst def |= ATOMMASK(atom); 48317683Spst } 48417683Spst } 485146768Ssam if (BPF_CLASS(b->s.code) == BPF_JMP) { 486146768Ssam /* 487146768Ssam * XXX - what about RET? 488146768Ssam */ 489146768Ssam atom = atomuse(&b->s); 490146768Ssam if (atom >= 0) { 491146768Ssam if (atom == AX_ATOM) { 492146768Ssam if (!ATOMELEM(def, X_ATOM)) 493146768Ssam use |= ATOMMASK(X_ATOM); 494146768Ssam if (!ATOMELEM(def, A_ATOM)) 495146768Ssam use |= ATOMMASK(A_ATOM); 496146768Ssam } 497146768Ssam else if (atom < N_ATOMS) { 498146768Ssam if (!ATOMELEM(def, atom)) 499146768Ssam use |= ATOMMASK(atom); 500146768Ssam } 501146768Ssam else 502146768Ssam abort(); 503146768Ssam } 504146768Ssam } 50517683Spst 50617683Spst b->def = def; 50717683Spst b->kill = kill; 50817683Spst b->in_use = use; 50917683Spst} 51017683Spst 51117683Spst/* 51217683Spst * Assume graph is already leveled. 51317683Spst */ 51417683Spststatic void 51517683Spstfind_ud(root) 51617683Spst struct block *root; 51717683Spst{ 51817683Spst int i, maxlevel; 51917683Spst struct block *p; 52017683Spst 52117683Spst /* 52217683Spst * root->level is the highest level no found; 52317683Spst * count down from there. 52417683Spst */ 52517683Spst maxlevel = root->level; 52617683Spst for (i = maxlevel; i >= 0; --i) 52717683Spst for (p = levels[i]; p; p = p->link) { 52817683Spst compute_local_ud(p); 52917683Spst p->out_use = 0; 53017683Spst } 53117683Spst 53217683Spst for (i = 1; i <= maxlevel; ++i) { 53317683Spst for (p = levels[i]; p; p = p->link) { 53417683Spst p->out_use |= JT(p)->in_use | JF(p)->in_use; 53517683Spst p->in_use |= p->out_use &~ p->kill; 53617683Spst } 53717683Spst } 53817683Spst} 53917683Spst 54017683Spst/* 54117683Spst * These data structures are used in a Cocke and Shwarz style 54217683Spst * value numbering scheme. Since the flowgraph is acyclic, 54317683Spst * exit values can be propagated from a node's predecessors 54417683Spst * provided it is uniquely defined. 54517683Spst */ 54617683Spststruct valnode { 54717683Spst int code; 54817683Spst int v0, v1; 54917683Spst int val; 55017683Spst struct valnode *next; 55117683Spst}; 55217683Spst 55317683Spst#define MODULUS 213 55417683Spststatic struct valnode *hashtbl[MODULUS]; 55517683Spststatic int curval; 55617683Spststatic int maxval; 55717683Spst 55817683Spst/* Integer constants mapped with the load immediate opcode. */ 55917683Spst#define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L) 56017683Spst 56117683Spststruct vmapinfo { 56217683Spst int is_const; 56317683Spst bpf_int32 const_val; 56417683Spst}; 56517683Spst 56617683Spststruct vmapinfo *vmap; 56717683Spststruct valnode *vnode_base; 56817683Spststruct valnode *next_vnode; 56917683Spst 57017683Spststatic void 57117683Spstinit_val() 57217683Spst{ 57317683Spst curval = 0; 57417683Spst next_vnode = vnode_base; 57517683Spst memset((char *)vmap, 0, maxval * sizeof(*vmap)); 57617683Spst memset((char *)hashtbl, 0, sizeof hashtbl); 57717683Spst} 57817683Spst 57917683Spst/* Because we really don't have an IR, this stuff is a little messy. */ 58017683Spststatic int 58117683SpstF(code, v0, v1) 58217683Spst int code; 58317683Spst int v0, v1; 58417683Spst{ 58517683Spst u_int hash; 58617683Spst int val; 58717683Spst struct valnode *p; 58817683Spst 58917683Spst hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8); 59017683Spst hash %= MODULUS; 59117683Spst 59217683Spst for (p = hashtbl[hash]; p; p = p->next) 59317683Spst if (p->code == code && p->v0 == v0 && p->v1 == v1) 59417683Spst return p->val; 59517683Spst 59617683Spst val = ++curval; 59717683Spst if (BPF_MODE(code) == BPF_IMM && 59817683Spst (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) { 59917683Spst vmap[val].const_val = v0; 60017683Spst vmap[val].is_const = 1; 60117683Spst } 60217683Spst p = next_vnode++; 60317683Spst p->val = val; 60417683Spst p->code = code; 60517683Spst p->v0 = v0; 60617683Spst p->v1 = v1; 60717683Spst p->next = hashtbl[hash]; 60817683Spst hashtbl[hash] = p; 60917683Spst 61017683Spst return val; 61117683Spst} 61217683Spst 61317683Spststatic inline void 61417683Spstvstore(s, valp, newval, alter) 61517683Spst struct stmt *s; 61617683Spst int *valp; 61717683Spst int newval; 61817683Spst int alter; 61917683Spst{ 62017683Spst if (alter && *valp == newval) 62117683Spst s->code = NOP; 62217683Spst else 62317683Spst *valp = newval; 62417683Spst} 62517683Spst 62617683Spststatic void 62717683Spstfold_op(s, v0, v1) 62817683Spst struct stmt *s; 62917683Spst int v0, v1; 63017683Spst{ 631172677Smlaier bpf_u_int32 a, b; 63217683Spst 63317683Spst a = vmap[v0].const_val; 63417683Spst b = vmap[v1].const_val; 63517683Spst 63617683Spst switch (BPF_OP(s->code)) { 63717683Spst case BPF_ADD: 63817683Spst a += b; 63917683Spst break; 64017683Spst 64117683Spst case BPF_SUB: 64217683Spst a -= b; 64317683Spst break; 64417683Spst 64517683Spst case BPF_MUL: 64617683Spst a *= b; 64717683Spst break; 64817683Spst 64917683Spst case BPF_DIV: 65017683Spst if (b == 0) 65117683Spst bpf_error("division by zero"); 65217683Spst a /= b; 65317683Spst break; 65417683Spst 65517683Spst case BPF_AND: 65617683Spst a &= b; 65717683Spst break; 65817683Spst 65917683Spst case BPF_OR: 66017683Spst a |= b; 66117683Spst break; 66217683Spst 66317683Spst case BPF_LSH: 66417683Spst a <<= b; 66517683Spst break; 66617683Spst 66717683Spst case BPF_RSH: 66817683Spst a >>= b; 66917683Spst break; 67017683Spst 67117683Spst case BPF_NEG: 67217683Spst a = -a; 67317683Spst break; 67417683Spst 67517683Spst default: 67617683Spst abort(); 67717683Spst } 67817683Spst s->k = a; 67917683Spst s->code = BPF_LD|BPF_IMM; 68017683Spst done = 0; 68117683Spst} 68217683Spst 68317683Spststatic inline struct slist * 68417683Spstthis_op(s) 68517683Spst struct slist *s; 68617683Spst{ 68717683Spst while (s != 0 && s->s.code == NOP) 68817683Spst s = s->next; 68917683Spst return s; 69017683Spst} 69117683Spst 69217683Spststatic void 69317683Spstopt_not(b) 69417683Spst struct block *b; 69517683Spst{ 69617683Spst struct block *tmp = JT(b); 69717683Spst 69817683Spst JT(b) = JF(b); 69917683Spst JF(b) = tmp; 70017683Spst} 70117683Spst 70217683Spststatic void 70317683Spstopt_peep(b) 70417683Spst struct block *b; 70517683Spst{ 70617683Spst struct slist *s; 70717683Spst struct slist *next, *last; 70817683Spst int val; 70917683Spst 71017683Spst s = b->stmts; 71117683Spst if (s == 0) 71217683Spst return; 71317683Spst 71417683Spst last = s; 715146768Ssam for (/*empty*/; /*empty*/; s = next) { 716146768Ssam /* 717146768Ssam * Skip over nops. 718146768Ssam */ 71917683Spst s = this_op(s); 72017683Spst if (s == 0) 721146768Ssam break; /* nothing left in the block */ 722146768Ssam 723146768Ssam /* 724146768Ssam * Find the next real instruction after that one 725146768Ssam * (skipping nops). 726146768Ssam */ 72717683Spst next = this_op(s->next); 72817683Spst if (next == 0) 729146768Ssam break; /* no next instruction */ 73017683Spst last = next; 73117683Spst 73217683Spst /* 73317683Spst * st M[k] --> st M[k] 73417683Spst * ldx M[k] tax 73517683Spst */ 73617683Spst if (s->s.code == BPF_ST && 73717683Spst next->s.code == (BPF_LDX|BPF_MEM) && 73817683Spst s->s.k == next->s.k) { 73917683Spst done = 0; 74017683Spst next->s.code = BPF_MISC|BPF_TAX; 74117683Spst } 74217683Spst /* 74317683Spst * ld #k --> ldx #k 74417683Spst * tax txa 74517683Spst */ 74617683Spst if (s->s.code == (BPF_LD|BPF_IMM) && 74717683Spst next->s.code == (BPF_MISC|BPF_TAX)) { 74817683Spst s->s.code = BPF_LDX|BPF_IMM; 74917683Spst next->s.code = BPF_MISC|BPF_TXA; 75017683Spst done = 0; 75117683Spst } 75217683Spst /* 75317683Spst * This is an ugly special case, but it happens 75417683Spst * when you say tcp[k] or udp[k] where k is a constant. 75517683Spst */ 75617683Spst if (s->s.code == (BPF_LD|BPF_IMM)) { 75717683Spst struct slist *add, *tax, *ild; 75817683Spst 75917683Spst /* 76017683Spst * Check that X isn't used on exit from this 76117683Spst * block (which the optimizer might cause). 76217683Spst * We know the code generator won't generate 76317683Spst * any local dependencies. 76417683Spst */ 76517683Spst if (ATOMELEM(b->out_use, X_ATOM)) 766146768Ssam continue; 76717683Spst 768146768Ssam /* 769146768Ssam * Check that the instruction following the ldi 770146768Ssam * is an addx, or it's an ldxms with an addx 771146768Ssam * following it (with 0 or more nops between the 772146768Ssam * ldxms and addx). 773146768Ssam */ 77417683Spst if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B)) 77517683Spst add = next; 77617683Spst else 77717683Spst add = this_op(next->next); 77817683Spst if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X)) 779146768Ssam continue; 78017683Spst 781146768Ssam /* 782146768Ssam * Check that a tax follows that (with 0 or more 783146768Ssam * nops between them). 784146768Ssam */ 78517683Spst tax = this_op(add->next); 78617683Spst if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX)) 787146768Ssam continue; 78817683Spst 789146768Ssam /* 790146768Ssam * Check that an ild follows that (with 0 or more 791146768Ssam * nops between them). 792146768Ssam */ 79317683Spst ild = this_op(tax->next); 79417683Spst if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD || 79517683Spst BPF_MODE(ild->s.code) != BPF_IND) 796146768Ssam continue; 79717683Spst /* 79817683Spst * We want to turn this sequence: 79917683Spst * 80017683Spst * (004) ldi #0x2 {s} 80117683Spst * (005) ldxms [14] {next} -- optional 80217683Spst * (006) addx {add} 80317683Spst * (007) tax {tax} 80417683Spst * (008) ild [x+0] {ild} 80517683Spst * 80617683Spst * into this sequence: 80717683Spst * 80817683Spst * (004) nop 80917683Spst * (005) ldxms [14] 81017683Spst * (006) nop 81117683Spst * (007) nop 81217683Spst * (008) ild [x+2] 81317683Spst * 814146768Ssam * XXX We need to check that X is not 815146768Ssam * subsequently used, because we want to change 816146768Ssam * what'll be in it after this sequence. 817146768Ssam * 818146768Ssam * We know we can eliminate the accumulator 819146768Ssam * modifications earlier in the sequence since 820146768Ssam * it is defined by the last stmt of this sequence 821146768Ssam * (i.e., the last statement of the sequence loads 822146768Ssam * a value into the accumulator, so we can eliminate 823146768Ssam * earlier operations on the accumulator). 82417683Spst */ 82517683Spst ild->s.k += s->s.k; 82617683Spst s->s.code = NOP; 82717683Spst add->s.code = NOP; 82817683Spst tax->s.code = NOP; 82917683Spst done = 0; 83017683Spst } 83117683Spst } 83217683Spst /* 833146768Ssam * If the comparison at the end of a block is an equality 834146768Ssam * comparison against a constant, and nobody uses the value 835146768Ssam * we leave in the A register at the end of a block, and 836146768Ssam * the operation preceding the comparison is an arithmetic 837146768Ssam * operation, we can sometime optimize it away. 83817683Spst */ 839146768Ssam if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) && 840146768Ssam !ATOMELEM(b->out_use, A_ATOM)) { 841146768Ssam /* 842146768Ssam * We can optimize away certain subtractions of the 843146768Ssam * X register. 844146768Ssam */ 845146768Ssam if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) { 846127664Sbms val = b->val[X_ATOM]; 847127664Sbms if (vmap[val].is_const) { 848127664Sbms /* 849146768Ssam * If we have a subtract to do a comparison, 850146768Ssam * and the X register is a known constant, 851146768Ssam * we can merge this value into the 852146768Ssam * comparison: 853146768Ssam * 854127664Sbms * sub x -> nop 855127664Sbms * jeq #y jeq #(x+y) 856127664Sbms */ 857127664Sbms b->s.k += vmap[val].const_val; 858127664Sbms last->s.code = NOP; 859127664Sbms done = 0; 860127664Sbms } else if (b->s.k == 0) { 861127664Sbms /* 862146768Ssam * If the X register isn't a constant, 863146768Ssam * and the comparison in the test is 864146768Ssam * against 0, we can compare with the 865146768Ssam * X register, instead: 866146768Ssam * 867146768Ssam * sub x -> nop 868146768Ssam * jeq #0 jeq x 869127664Sbms */ 870127664Sbms last->s.code = NOP; 871146768Ssam b->s.code = BPF_JMP|BPF_JEQ|BPF_X; 872127664Sbms done = 0; 873127664Sbms } 874127664Sbms } 875127664Sbms /* 876146768Ssam * Likewise, a constant subtract can be simplified: 877146768Ssam * 878146768Ssam * sub #x -> nop 879146768Ssam * jeq #y -> jeq #(x+y) 880127664Sbms */ 881146768Ssam else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) { 88217683Spst last->s.code = NOP; 883127664Sbms b->s.k += last->s.k; 88417683Spst done = 0; 88517683Spst } 886146768Ssam /* 887146768Ssam * And, similarly, a constant AND can be simplified 888146768Ssam * if we're testing against 0, i.e.: 889146768Ssam * 890146768Ssam * and #k nop 891146768Ssam * jeq #0 -> jset #k 892146768Ssam */ 893146768Ssam else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) && 894146768Ssam b->s.k == 0) { 895146768Ssam b->s.k = last->s.k; 896146768Ssam b->s.code = BPF_JMP|BPF_K|BPF_JSET; 897146768Ssam last->s.code = NOP; 898146768Ssam done = 0; 899146768Ssam opt_not(b); 900146768Ssam } 90117683Spst } 90217683Spst /* 903127664Sbms * jset #0 -> never 904127664Sbms * jset #ffffffff -> always 905127664Sbms */ 906127664Sbms if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) { 907127664Sbms if (b->s.k == 0) 908127664Sbms JT(b) = JF(b); 909127664Sbms if (b->s.k == 0xffffffff) 910127664Sbms JF(b) = JT(b); 911127664Sbms } 912127664Sbms /* 913190225Srpaulo * If we're comparing against the index register, and the index 914190225Srpaulo * register is a known constant, we can just compare against that 915190225Srpaulo * constant. 916190225Srpaulo */ 917190225Srpaulo val = b->val[X_ATOM]; 918190225Srpaulo if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) { 919190225Srpaulo bpf_int32 v = vmap[val].const_val; 920190225Srpaulo b->s.code &= ~BPF_X; 921190225Srpaulo b->s.k = v; 922190225Srpaulo } 923190225Srpaulo /* 92417683Spst * If the accumulator is a known constant, we can compute the 92517683Spst * comparison result. 92617683Spst */ 92717683Spst val = b->val[A_ATOM]; 92817683Spst if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) { 92917683Spst bpf_int32 v = vmap[val].const_val; 93017683Spst switch (BPF_OP(b->s.code)) { 93117683Spst 93217683Spst case BPF_JEQ: 93317683Spst v = v == b->s.k; 93417683Spst break; 93517683Spst 93617683Spst case BPF_JGT: 93717683Spst v = (unsigned)v > b->s.k; 93817683Spst break; 93917683Spst 94017683Spst case BPF_JGE: 94117683Spst v = (unsigned)v >= b->s.k; 94217683Spst break; 94317683Spst 94417683Spst case BPF_JSET: 94517683Spst v &= b->s.k; 94617683Spst break; 94717683Spst 94817683Spst default: 94917683Spst abort(); 95017683Spst } 95117683Spst if (JF(b) != JT(b)) 95217683Spst done = 0; 95317683Spst if (v) 95417683Spst JF(b) = JT(b); 95517683Spst else 95617683Spst JT(b) = JF(b); 95717683Spst } 95817683Spst} 95917683Spst 96017683Spst/* 96117683Spst * Compute the symbolic value of expression of 's', and update 96217683Spst * anything it defines in the value table 'val'. If 'alter' is true, 96317683Spst * do various optimizations. This code would be cleaner if symbolic 96417683Spst * evaluation and code transformations weren't folded together. 96517683Spst */ 96617683Spststatic void 96717683Spstopt_stmt(s, val, alter) 96817683Spst struct stmt *s; 96917683Spst int val[]; 97017683Spst int alter; 97117683Spst{ 97217683Spst int op; 97317683Spst int v; 97417683Spst 97517683Spst switch (s->code) { 97617683Spst 97717683Spst case BPF_LD|BPF_ABS|BPF_W: 97817683Spst case BPF_LD|BPF_ABS|BPF_H: 97917683Spst case BPF_LD|BPF_ABS|BPF_B: 98017683Spst v = F(s->code, s->k, 0L); 98117683Spst vstore(s, &val[A_ATOM], v, alter); 98217683Spst break; 98317683Spst 98417683Spst case BPF_LD|BPF_IND|BPF_W: 98517683Spst case BPF_LD|BPF_IND|BPF_H: 98617683Spst case BPF_LD|BPF_IND|BPF_B: 98717683Spst v = val[X_ATOM]; 98817683Spst if (alter && vmap[v].is_const) { 98917683Spst s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code); 99017683Spst s->k += vmap[v].const_val; 99117683Spst v = F(s->code, s->k, 0L); 99217683Spst done = 0; 99317683Spst } 99417683Spst else 99517683Spst v = F(s->code, s->k, v); 99617683Spst vstore(s, &val[A_ATOM], v, alter); 99717683Spst break; 99817683Spst 99917683Spst case BPF_LD|BPF_LEN: 100017683Spst v = F(s->code, 0L, 0L); 100117683Spst vstore(s, &val[A_ATOM], v, alter); 100217683Spst break; 100317683Spst 100417683Spst case BPF_LD|BPF_IMM: 100517683Spst v = K(s->k); 100617683Spst vstore(s, &val[A_ATOM], v, alter); 100717683Spst break; 100817683Spst 100917683Spst case BPF_LDX|BPF_IMM: 101017683Spst v = K(s->k); 101117683Spst vstore(s, &val[X_ATOM], v, alter); 101217683Spst break; 101317683Spst 101417683Spst case BPF_LDX|BPF_MSH|BPF_B: 101517683Spst v = F(s->code, s->k, 0L); 101617683Spst vstore(s, &val[X_ATOM], v, alter); 101717683Spst break; 101817683Spst 101917683Spst case BPF_ALU|BPF_NEG: 102017683Spst if (alter && vmap[val[A_ATOM]].is_const) { 102117683Spst s->code = BPF_LD|BPF_IMM; 102217683Spst s->k = -vmap[val[A_ATOM]].const_val; 102317683Spst val[A_ATOM] = K(s->k); 102417683Spst } 102517683Spst else 102617683Spst val[A_ATOM] = F(s->code, val[A_ATOM], 0L); 102717683Spst break; 102817683Spst 102917683Spst case BPF_ALU|BPF_ADD|BPF_K: 103017683Spst case BPF_ALU|BPF_SUB|BPF_K: 103117683Spst case BPF_ALU|BPF_MUL|BPF_K: 103217683Spst case BPF_ALU|BPF_DIV|BPF_K: 103317683Spst case BPF_ALU|BPF_AND|BPF_K: 103417683Spst case BPF_ALU|BPF_OR|BPF_K: 103517683Spst case BPF_ALU|BPF_LSH|BPF_K: 103617683Spst case BPF_ALU|BPF_RSH|BPF_K: 103717683Spst op = BPF_OP(s->code); 103817683Spst if (alter) { 103917683Spst if (s->k == 0) { 104098530Sfenner /* don't optimize away "sub #0" 104198530Sfenner * as it may be needed later to 104298530Sfenner * fixup the generated math code */ 104398530Sfenner if (op == BPF_ADD || 104417683Spst op == BPF_LSH || op == BPF_RSH || 104517683Spst op == BPF_OR) { 104617683Spst s->code = NOP; 104717683Spst break; 104817683Spst } 104917683Spst if (op == BPF_MUL || op == BPF_AND) { 105017683Spst s->code = BPF_LD|BPF_IMM; 105117683Spst val[A_ATOM] = K(s->k); 105217683Spst break; 105317683Spst } 105417683Spst } 105517683Spst if (vmap[val[A_ATOM]].is_const) { 105617683Spst fold_op(s, val[A_ATOM], K(s->k)); 105717683Spst val[A_ATOM] = K(s->k); 105817683Spst break; 105917683Spst } 106017683Spst } 106117683Spst val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k)); 106217683Spst break; 106317683Spst 106417683Spst case BPF_ALU|BPF_ADD|BPF_X: 106517683Spst case BPF_ALU|BPF_SUB|BPF_X: 106617683Spst case BPF_ALU|BPF_MUL|BPF_X: 106717683Spst case BPF_ALU|BPF_DIV|BPF_X: 106817683Spst case BPF_ALU|BPF_AND|BPF_X: 106917683Spst case BPF_ALU|BPF_OR|BPF_X: 107017683Spst case BPF_ALU|BPF_LSH|BPF_X: 107117683Spst case BPF_ALU|BPF_RSH|BPF_X: 107217683Spst op = BPF_OP(s->code); 107317683Spst if (alter && vmap[val[X_ATOM]].is_const) { 107417683Spst if (vmap[val[A_ATOM]].is_const) { 107517683Spst fold_op(s, val[A_ATOM], val[X_ATOM]); 107617683Spst val[A_ATOM] = K(s->k); 107717683Spst } 107817683Spst else { 107917683Spst s->code = BPF_ALU|BPF_K|op; 108017683Spst s->k = vmap[val[X_ATOM]].const_val; 108117683Spst done = 0; 108217683Spst val[A_ATOM] = 108317683Spst F(s->code, val[A_ATOM], K(s->k)); 108417683Spst } 108517683Spst break; 108617683Spst } 108717683Spst /* 108817683Spst * Check if we're doing something to an accumulator 108917683Spst * that is 0, and simplify. This may not seem like 109017683Spst * much of a simplification but it could open up further 109117683Spst * optimizations. 1092127664Sbms * XXX We could also check for mul by 1, etc. 109317683Spst */ 109417683Spst if (alter && vmap[val[A_ATOM]].is_const 109517683Spst && vmap[val[A_ATOM]].const_val == 0) { 1096127664Sbms if (op == BPF_ADD || op == BPF_OR) { 109717683Spst s->code = BPF_MISC|BPF_TXA; 109817683Spst vstore(s, &val[A_ATOM], val[X_ATOM], alter); 109917683Spst break; 110017683Spst } 110117683Spst else if (op == BPF_MUL || op == BPF_DIV || 1102127664Sbms op == BPF_AND || op == BPF_LSH || op == BPF_RSH) { 110317683Spst s->code = BPF_LD|BPF_IMM; 110417683Spst s->k = 0; 110517683Spst vstore(s, &val[A_ATOM], K(s->k), alter); 110617683Spst break; 110717683Spst } 110817683Spst else if (op == BPF_NEG) { 110917683Spst s->code = NOP; 111017683Spst break; 111117683Spst } 111217683Spst } 111317683Spst val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]); 111417683Spst break; 111517683Spst 111617683Spst case BPF_MISC|BPF_TXA: 111717683Spst vstore(s, &val[A_ATOM], val[X_ATOM], alter); 111817683Spst break; 111917683Spst 112017683Spst case BPF_LD|BPF_MEM: 112117683Spst v = val[s->k]; 112217683Spst if (alter && vmap[v].is_const) { 112317683Spst s->code = BPF_LD|BPF_IMM; 112417683Spst s->k = vmap[v].const_val; 112517683Spst done = 0; 112617683Spst } 112717683Spst vstore(s, &val[A_ATOM], v, alter); 112817683Spst break; 112917683Spst 113017683Spst case BPF_MISC|BPF_TAX: 113117683Spst vstore(s, &val[X_ATOM], val[A_ATOM], alter); 113217683Spst break; 113317683Spst 113417683Spst case BPF_LDX|BPF_MEM: 113517683Spst v = val[s->k]; 113617683Spst if (alter && vmap[v].is_const) { 113717683Spst s->code = BPF_LDX|BPF_IMM; 113817683Spst s->k = vmap[v].const_val; 113917683Spst done = 0; 114017683Spst } 114117683Spst vstore(s, &val[X_ATOM], v, alter); 114217683Spst break; 114317683Spst 114417683Spst case BPF_ST: 114517683Spst vstore(s, &val[s->k], val[A_ATOM], alter); 114617683Spst break; 114717683Spst 114817683Spst case BPF_STX: 114917683Spst vstore(s, &val[s->k], val[X_ATOM], alter); 115017683Spst break; 115117683Spst } 115217683Spst} 115317683Spst 115417683Spststatic void 115517683Spstdeadstmt(s, last) 115617683Spst register struct stmt *s; 115717683Spst register struct stmt *last[]; 115817683Spst{ 115917683Spst register int atom; 116017683Spst 116117683Spst atom = atomuse(s); 116217683Spst if (atom >= 0) { 116317683Spst if (atom == AX_ATOM) { 116417683Spst last[X_ATOM] = 0; 116517683Spst last[A_ATOM] = 0; 116617683Spst } 116717683Spst else 116817683Spst last[atom] = 0; 116917683Spst } 117017683Spst atom = atomdef(s); 117117683Spst if (atom >= 0) { 117217683Spst if (last[atom]) { 117317683Spst done = 0; 117417683Spst last[atom]->code = NOP; 117517683Spst } 117617683Spst last[atom] = s; 117717683Spst } 117817683Spst} 117917683Spst 118017683Spststatic void 118117683Spstopt_deadstores(b) 118217683Spst register struct block *b; 118317683Spst{ 118417683Spst register struct slist *s; 118517683Spst register int atom; 118617683Spst struct stmt *last[N_ATOMS]; 118717683Spst 118817683Spst memset((char *)last, 0, sizeof last); 118917683Spst 119017683Spst for (s = b->stmts; s != 0; s = s->next) 119117683Spst deadstmt(&s->s, last); 119217683Spst deadstmt(&b->s, last); 119317683Spst 119417683Spst for (atom = 0; atom < N_ATOMS; ++atom) 119517683Spst if (last[atom] && !ATOMELEM(b->out_use, atom)) { 119617683Spst last[atom]->code = NOP; 119717683Spst done = 0; 119817683Spst } 119917683Spst} 120017683Spst 120117683Spststatic void 120217683Spstopt_blk(b, do_stmts) 120317683Spst struct block *b; 120417683Spst int do_stmts; 120517683Spst{ 120617683Spst struct slist *s; 120717683Spst struct edge *p; 120817683Spst int i; 1209146768Ssam bpf_int32 aval, xval; 121017683Spst 121156889Sfenner#if 0 121256889Sfenner for (s = b->stmts; s && s->next; s = s->next) 121356889Sfenner if (BPF_CLASS(s->s.code) == BPF_JMP) { 121456889Sfenner do_stmts = 0; 121556889Sfenner break; 121656889Sfenner } 121756889Sfenner#endif 121856889Sfenner 121917683Spst /* 122017683Spst * Initialize the atom values. 122117683Spst */ 122217683Spst p = b->in_edges; 1223146768Ssam if (p == 0) { 1224146768Ssam /* 1225146768Ssam * We have no predecessors, so everything is undefined 1226146768Ssam * upon entry to this block. 1227146768Ssam */ 122817683Spst memset((char *)b->val, 0, sizeof(b->val)); 1229146768Ssam } else { 1230146768Ssam /* 1231146768Ssam * Inherit values from our predecessors. 1232146768Ssam * 1233146768Ssam * First, get the values from the predecessor along the 1234146768Ssam * first edge leading to this node. 1235146768Ssam */ 123617683Spst memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val)); 1237146768Ssam /* 1238146768Ssam * Now look at all the other nodes leading to this node. 1239146768Ssam * If, for the predecessor along that edge, a register 1240146768Ssam * has a different value from the one we have (i.e., 1241146768Ssam * control paths are merging, and the merging paths 1242146768Ssam * assign different values to that register), give the 1243146768Ssam * register the undefined value of 0. 1244146768Ssam */ 124517683Spst while ((p = p->next) != NULL) { 124617683Spst for (i = 0; i < N_ATOMS; ++i) 124717683Spst if (b->val[i] != p->pred->val[i]) 124817683Spst b->val[i] = 0; 124917683Spst } 125017683Spst } 125117683Spst aval = b->val[A_ATOM]; 1252146768Ssam xval = b->val[X_ATOM]; 125317683Spst for (s = b->stmts; s; s = s->next) 125417683Spst opt_stmt(&s->s, b->val, do_stmts); 125517683Spst 125617683Spst /* 125717683Spst * This is a special case: if we don't use anything from this 1258146768Ssam * block, and we load the accumulator or index register with a 1259146768Ssam * value that is already there, or if this block is a return, 126017683Spst * eliminate all the statements. 1261146768Ssam * 1262146768Ssam * XXX - what if it does a store? 1263146768Ssam * 1264146768Ssam * XXX - why does it matter whether we use anything from this 1265146768Ssam * block? If the accumulator or index register doesn't change 1266146768Ssam * its value, isn't that OK even if we use that value? 1267146768Ssam * 1268146768Ssam * XXX - if we load the accumulator with a different value, 1269146768Ssam * and the block ends with a conditional branch, we obviously 1270146768Ssam * can't eliminate it, as the branch depends on that value. 1271146768Ssam * For the index register, the conditional branch only depends 1272146768Ssam * on the index register value if the test is against the index 1273146768Ssam * register value rather than a constant; if nothing uses the 1274146768Ssam * value we put into the index register, and we're not testing 1275146768Ssam * against the index register's value, and there aren't any 1276146768Ssam * other problems that would keep us from eliminating this 1277146768Ssam * block, can we eliminate it? 127817683Spst */ 1279127664Sbms if (do_stmts && 1280146768Ssam ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval && 1281146768Ssam xval != 0 && b->val[X_ATOM] == xval) || 128217683Spst BPF_CLASS(b->s.code) == BPF_RET)) { 128317683Spst if (b->stmts != 0) { 128417683Spst b->stmts = 0; 128517683Spst done = 0; 128617683Spst } 128717683Spst } else { 128817683Spst opt_peep(b); 128917683Spst opt_deadstores(b); 129017683Spst } 129117683Spst /* 129217683Spst * Set up values for branch optimizer. 129317683Spst */ 129417683Spst if (BPF_SRC(b->s.code) == BPF_K) 129517683Spst b->oval = K(b->s.k); 129617683Spst else 129717683Spst b->oval = b->val[X_ATOM]; 129817683Spst b->et.code = b->s.code; 129917683Spst b->ef.code = -b->s.code; 130017683Spst} 130117683Spst 130217683Spst/* 130317683Spst * Return true if any register that is used on exit from 'succ', has 130417683Spst * an exit value that is different from the corresponding exit value 130517683Spst * from 'b'. 130617683Spst */ 130717683Spststatic int 130817683Spstuse_conflict(b, succ) 130917683Spst struct block *b, *succ; 131017683Spst{ 131117683Spst int atom; 131217683Spst atomset use = succ->out_use; 131317683Spst 131417683Spst if (use == 0) 131517683Spst return 0; 131617683Spst 131717683Spst for (atom = 0; atom < N_ATOMS; ++atom) 131817683Spst if (ATOMELEM(use, atom)) 131917683Spst if (b->val[atom] != succ->val[atom]) 132017683Spst return 1; 132117683Spst return 0; 132217683Spst} 132317683Spst 132417683Spststatic struct block * 132517683Spstfold_edge(child, ep) 132617683Spst struct block *child; 132717683Spst struct edge *ep; 132817683Spst{ 132917683Spst int sense; 133017683Spst int aval0, aval1, oval0, oval1; 133117683Spst int code = ep->code; 133217683Spst 133317683Spst if (code < 0) { 133417683Spst code = -code; 133517683Spst sense = 0; 133617683Spst } else 133717683Spst sense = 1; 133817683Spst 133917683Spst if (child->s.code != code) 134017683Spst return 0; 134117683Spst 134217683Spst aval0 = child->val[A_ATOM]; 134317683Spst oval0 = child->oval; 134417683Spst aval1 = ep->pred->val[A_ATOM]; 134517683Spst oval1 = ep->pred->oval; 134617683Spst 134717683Spst if (aval0 != aval1) 134817683Spst return 0; 134917683Spst 135017683Spst if (oval0 == oval1) 135117683Spst /* 1352146768Ssam * The operands of the branch instructions are 1353146768Ssam * identical, so the result is true if a true 1354146768Ssam * branch was taken to get here, otherwise false. 135517683Spst */ 135617683Spst return sense ? JT(child) : JF(child); 135717683Spst 135817683Spst if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K)) 135917683Spst /* 136017683Spst * At this point, we only know the comparison if we 136117683Spst * came down the true branch, and it was an equality 1362146768Ssam * comparison with a constant. 1363146768Ssam * 1364146768Ssam * I.e., if we came down the true branch, and the branch 1365146768Ssam * was an equality comparison with a constant, we know the 1366146768Ssam * accumulator contains that constant. If we came down 1367146768Ssam * the false branch, or the comparison wasn't with a 1368146768Ssam * constant, we don't know what was in the accumulator. 1369146768Ssam * 1370146768Ssam * We rely on the fact that distinct constants have distinct 1371146768Ssam * value numbers. 137217683Spst */ 137317683Spst return JF(child); 137417683Spst 137517683Spst return 0; 137617683Spst} 137717683Spst 137817683Spststatic void 137917683Spstopt_j(ep) 138017683Spst struct edge *ep; 138117683Spst{ 138217683Spst register int i, k; 138317683Spst register struct block *target; 138417683Spst 138517683Spst if (JT(ep->succ) == 0) 138617683Spst return; 138717683Spst 138817683Spst if (JT(ep->succ) == JF(ep->succ)) { 138917683Spst /* 139017683Spst * Common branch targets can be eliminated, provided 139117683Spst * there is no data dependency. 139217683Spst */ 139317683Spst if (!use_conflict(ep->pred, ep->succ->et.succ)) { 139417683Spst done = 0; 139517683Spst ep->succ = JT(ep->succ); 139617683Spst } 139717683Spst } 139817683Spst /* 139917683Spst * For each edge dominator that matches the successor of this 140017683Spst * edge, promote the edge successor to the its grandchild. 140117683Spst * 140217683Spst * XXX We violate the set abstraction here in favor a reasonably 140317683Spst * efficient loop. 140417683Spst */ 140517683Spst top: 140617683Spst for (i = 0; i < edgewords; ++i) { 140717683Spst register bpf_u_int32 x = ep->edom[i]; 140817683Spst 140917683Spst while (x != 0) { 141017683Spst k = ffs(x) - 1; 141117683Spst x &=~ (1 << k); 141217683Spst k += i * BITS_PER_WORD; 141317683Spst 141417683Spst target = fold_edge(ep->succ, edges[k]); 141517683Spst /* 141617683Spst * Check that there is no data dependency between 141717683Spst * nodes that will be violated if we move the edge. 141817683Spst */ 141917683Spst if (target != 0 && !use_conflict(ep->pred, target)) { 142017683Spst done = 0; 142117683Spst ep->succ = target; 142217683Spst if (JT(target) != 0) 142317683Spst /* 142417683Spst * Start over unless we hit a leaf. 142517683Spst */ 142617683Spst goto top; 142717683Spst return; 142817683Spst } 142917683Spst } 143017683Spst } 143117683Spst} 143217683Spst 143317683Spst 143417683Spststatic void 143517683Spstor_pullup(b) 143617683Spst struct block *b; 143717683Spst{ 143817683Spst int val, at_top; 143917683Spst struct block *pull; 144017683Spst struct block **diffp, **samep; 144117683Spst struct edge *ep; 144217683Spst 144317683Spst ep = b->in_edges; 144417683Spst if (ep == 0) 144517683Spst return; 144617683Spst 144717683Spst /* 144817683Spst * Make sure each predecessor loads the same value. 144917683Spst * XXX why? 145017683Spst */ 145117683Spst val = ep->pred->val[A_ATOM]; 145217683Spst for (ep = ep->next; ep != 0; ep = ep->next) 145317683Spst if (val != ep->pred->val[A_ATOM]) 145417683Spst return; 145517683Spst 145617683Spst if (JT(b->in_edges->pred) == b) 145717683Spst diffp = &JT(b->in_edges->pred); 145817683Spst else 145917683Spst diffp = &JF(b->in_edges->pred); 146017683Spst 146117683Spst at_top = 1; 146217683Spst while (1) { 146317683Spst if (*diffp == 0) 146417683Spst return; 146517683Spst 146617683Spst if (JT(*diffp) != JT(b)) 146717683Spst return; 146817683Spst 146917683Spst if (!SET_MEMBER((*diffp)->dom, b->id)) 147017683Spst return; 147117683Spst 147217683Spst if ((*diffp)->val[A_ATOM] != val) 147317683Spst break; 147417683Spst 147517683Spst diffp = &JF(*diffp); 147617683Spst at_top = 0; 147717683Spst } 147817683Spst samep = &JF(*diffp); 147917683Spst while (1) { 148017683Spst if (*samep == 0) 148117683Spst return; 148217683Spst 148317683Spst if (JT(*samep) != JT(b)) 148417683Spst return; 148517683Spst 148617683Spst if (!SET_MEMBER((*samep)->dom, b->id)) 148717683Spst return; 148817683Spst 148917683Spst if ((*samep)->val[A_ATOM] == val) 149017683Spst break; 149117683Spst 149217683Spst /* XXX Need to check that there are no data dependencies 149317683Spst between dp0 and dp1. Currently, the code generator 149417683Spst will not produce such dependencies. */ 149517683Spst samep = &JF(*samep); 149617683Spst } 149717683Spst#ifdef notdef 149817683Spst /* XXX This doesn't cover everything. */ 149917683Spst for (i = 0; i < N_ATOMS; ++i) 150017683Spst if ((*samep)->val[i] != pred->val[i]) 150117683Spst return; 150217683Spst#endif 150317683Spst /* Pull up the node. */ 150417683Spst pull = *samep; 150517683Spst *samep = JF(pull); 150617683Spst JF(pull) = *diffp; 150717683Spst 150817683Spst /* 150917683Spst * At the top of the chain, each predecessor needs to point at the 151017683Spst * pulled up node. Inside the chain, there is only one predecessor 151117683Spst * to worry about. 151217683Spst */ 151317683Spst if (at_top) { 151417683Spst for (ep = b->in_edges; ep != 0; ep = ep->next) { 151517683Spst if (JT(ep->pred) == b) 151617683Spst JT(ep->pred) = pull; 151717683Spst else 151817683Spst JF(ep->pred) = pull; 151917683Spst } 152017683Spst } 152117683Spst else 152217683Spst *diffp = pull; 152317683Spst 152417683Spst done = 0; 152517683Spst} 152617683Spst 152717683Spststatic void 152817683Spstand_pullup(b) 152917683Spst struct block *b; 153017683Spst{ 153117683Spst int val, at_top; 153217683Spst struct block *pull; 153317683Spst struct block **diffp, **samep; 153417683Spst struct edge *ep; 153517683Spst 153617683Spst ep = b->in_edges; 153717683Spst if (ep == 0) 153817683Spst return; 153917683Spst 154017683Spst /* 154117683Spst * Make sure each predecessor loads the same value. 154217683Spst */ 154317683Spst val = ep->pred->val[A_ATOM]; 154417683Spst for (ep = ep->next; ep != 0; ep = ep->next) 154517683Spst if (val != ep->pred->val[A_ATOM]) 154617683Spst return; 154717683Spst 154817683Spst if (JT(b->in_edges->pred) == b) 154917683Spst diffp = &JT(b->in_edges->pred); 155017683Spst else 155117683Spst diffp = &JF(b->in_edges->pred); 155217683Spst 155317683Spst at_top = 1; 155417683Spst while (1) { 155517683Spst if (*diffp == 0) 155617683Spst return; 155717683Spst 155817683Spst if (JF(*diffp) != JF(b)) 155917683Spst return; 156017683Spst 156117683Spst if (!SET_MEMBER((*diffp)->dom, b->id)) 156217683Spst return; 156317683Spst 156417683Spst if ((*diffp)->val[A_ATOM] != val) 156517683Spst break; 156617683Spst 156717683Spst diffp = &JT(*diffp); 156817683Spst at_top = 0; 156917683Spst } 157017683Spst samep = &JT(*diffp); 157117683Spst while (1) { 157217683Spst if (*samep == 0) 157317683Spst return; 157417683Spst 157517683Spst if (JF(*samep) != JF(b)) 157617683Spst return; 157717683Spst 157817683Spst if (!SET_MEMBER((*samep)->dom, b->id)) 157917683Spst return; 158017683Spst 158117683Spst if ((*samep)->val[A_ATOM] == val) 158217683Spst break; 158317683Spst 158417683Spst /* XXX Need to check that there are no data dependencies 158517683Spst between diffp and samep. Currently, the code generator 158617683Spst will not produce such dependencies. */ 158717683Spst samep = &JT(*samep); 158817683Spst } 158917683Spst#ifdef notdef 159017683Spst /* XXX This doesn't cover everything. */ 159117683Spst for (i = 0; i < N_ATOMS; ++i) 159217683Spst if ((*samep)->val[i] != pred->val[i]) 159317683Spst return; 159417683Spst#endif 159517683Spst /* Pull up the node. */ 159617683Spst pull = *samep; 159717683Spst *samep = JT(pull); 159817683Spst JT(pull) = *diffp; 159917683Spst 160017683Spst /* 160117683Spst * At the top of the chain, each predecessor needs to point at the 160217683Spst * pulled up node. Inside the chain, there is only one predecessor 160317683Spst * to worry about. 160417683Spst */ 160517683Spst if (at_top) { 160617683Spst for (ep = b->in_edges; ep != 0; ep = ep->next) { 160717683Spst if (JT(ep->pred) == b) 160817683Spst JT(ep->pred) = pull; 160917683Spst else 161017683Spst JF(ep->pred) = pull; 161117683Spst } 161217683Spst } 161317683Spst else 161417683Spst *diffp = pull; 161517683Spst 161617683Spst done = 0; 161717683Spst} 161817683Spst 161917683Spststatic void 162017683Spstopt_blks(root, do_stmts) 162117683Spst struct block *root; 162217683Spst int do_stmts; 162317683Spst{ 162417683Spst int i, maxlevel; 162517683Spst struct block *p; 162617683Spst 162717683Spst init_val(); 162817683Spst maxlevel = root->level; 162975107Sfenner 163075107Sfenner find_inedges(root); 163117683Spst for (i = maxlevel; i >= 0; --i) 163217683Spst for (p = levels[i]; p; p = p->link) 163317683Spst opt_blk(p, do_stmts); 163417683Spst 163517683Spst if (do_stmts) 163617683Spst /* 163717683Spst * No point trying to move branches; it can't possibly 163817683Spst * make a difference at this point. 163917683Spst */ 164017683Spst return; 164117683Spst 164217683Spst for (i = 1; i <= maxlevel; ++i) { 164317683Spst for (p = levels[i]; p; p = p->link) { 164417683Spst opt_j(&p->et); 164517683Spst opt_j(&p->ef); 164617683Spst } 164717683Spst } 164875107Sfenner 164975107Sfenner find_inedges(root); 165017683Spst for (i = 1; i <= maxlevel; ++i) { 165117683Spst for (p = levels[i]; p; p = p->link) { 165217683Spst or_pullup(p); 165317683Spst and_pullup(p); 165417683Spst } 165517683Spst } 165617683Spst} 165717683Spst 165817683Spststatic inline void 165917683Spstlink_inedge(parent, child) 166017683Spst struct edge *parent; 166117683Spst struct block *child; 166217683Spst{ 166317683Spst parent->next = child->in_edges; 166417683Spst child->in_edges = parent; 166517683Spst} 166617683Spst 166717683Spststatic void 166817683Spstfind_inedges(root) 166917683Spst struct block *root; 167017683Spst{ 167117683Spst int i; 167217683Spst struct block *b; 167317683Spst 167417683Spst for (i = 0; i < n_blocks; ++i) 167517683Spst blocks[i]->in_edges = 0; 167617683Spst 167717683Spst /* 167817683Spst * Traverse the graph, adding each edge to the predecessor 167917683Spst * list of its successors. Skip the leaves (i.e. level 0). 168017683Spst */ 168117683Spst for (i = root->level; i > 0; --i) { 168217683Spst for (b = levels[i]; b != 0; b = b->link) { 168317683Spst link_inedge(&b->et, JT(b)); 168417683Spst link_inedge(&b->ef, JF(b)); 168517683Spst } 168617683Spst } 168717683Spst} 168817683Spst 168917683Spststatic void 169017683Spstopt_root(b) 169117683Spst struct block **b; 169217683Spst{ 169317683Spst struct slist *tmp, *s; 169417683Spst 169517683Spst s = (*b)->stmts; 169617683Spst (*b)->stmts = 0; 169717683Spst while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b)) 169817683Spst *b = JT(*b); 169917683Spst 170017683Spst tmp = (*b)->stmts; 170117683Spst if (tmp != 0) 170217683Spst sappend(s, tmp); 170317683Spst (*b)->stmts = s; 170417683Spst 170517683Spst /* 170617683Spst * If the root node is a return, then there is no 170717683Spst * point executing any statements (since the bpf machine 170817683Spst * has no side effects). 170917683Spst */ 171017683Spst if (BPF_CLASS((*b)->s.code) == BPF_RET) 171117683Spst (*b)->stmts = 0; 171217683Spst} 171317683Spst 171417683Spststatic void 171517683Spstopt_loop(root, do_stmts) 171617683Spst struct block *root; 171717683Spst int do_stmts; 171817683Spst{ 171917683Spst 172017683Spst#ifdef BDEBUG 172198530Sfenner if (dflag > 1) { 172298530Sfenner printf("opt_loop(root, %d) begin\n", do_stmts); 172317683Spst opt_dump(root); 172498530Sfenner } 172517683Spst#endif 172617683Spst do { 172717683Spst done = 1; 172817683Spst find_levels(root); 172917683Spst find_dom(root); 173017683Spst find_closure(root); 173117683Spst find_ud(root); 173217683Spst find_edom(root); 173317683Spst opt_blks(root, do_stmts); 173417683Spst#ifdef BDEBUG 173598530Sfenner if (dflag > 1) { 173698530Sfenner printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done); 173717683Spst opt_dump(root); 173898530Sfenner } 173917683Spst#endif 174017683Spst } while (!done); 174117683Spst} 174217683Spst 174317683Spst/* 174417683Spst * Optimize the filter code in its dag representation. 174517683Spst */ 174617683Spstvoid 174717683Spstbpf_optimize(rootp) 174817683Spst struct block **rootp; 174917683Spst{ 175017683Spst struct block *root; 175117683Spst 175217683Spst root = *rootp; 175317683Spst 175417683Spst opt_init(root); 175517683Spst opt_loop(root, 0); 175617683Spst opt_loop(root, 1); 175717683Spst intern_blocks(root); 175898530Sfenner#ifdef BDEBUG 175998530Sfenner if (dflag > 1) { 176098530Sfenner printf("after intern_blocks()\n"); 176198530Sfenner opt_dump(root); 176298530Sfenner } 176398530Sfenner#endif 176417683Spst opt_root(rootp); 176598530Sfenner#ifdef BDEBUG 176698530Sfenner if (dflag > 1) { 176798530Sfenner printf("after opt_root()\n"); 176898530Sfenner opt_dump(root); 176998530Sfenner } 177098530Sfenner#endif 177117683Spst opt_cleanup(); 177217683Spst} 177317683Spst 177417683Spststatic void 177517683Spstmake_marks(p) 177617683Spst struct block *p; 177717683Spst{ 177817683Spst if (!isMarked(p)) { 177917683Spst Mark(p); 178017683Spst if (BPF_CLASS(p->s.code) != BPF_RET) { 178117683Spst make_marks(JT(p)); 178217683Spst make_marks(JF(p)); 178317683Spst } 178417683Spst } 178517683Spst} 178617683Spst 178717683Spst/* 178817683Spst * Mark code array such that isMarked(i) is true 178917683Spst * only for nodes that are alive. 179017683Spst */ 179117683Spststatic void 179217683Spstmark_code(p) 179317683Spst struct block *p; 179417683Spst{ 179517683Spst cur_mark += 1; 179617683Spst make_marks(p); 179717683Spst} 179817683Spst 179917683Spst/* 180017683Spst * True iff the two stmt lists load the same value from the packet into 180117683Spst * the accumulator. 180217683Spst */ 180317683Spststatic int 180417683Spsteq_slist(x, y) 180517683Spst struct slist *x, *y; 180617683Spst{ 180717683Spst while (1) { 180817683Spst while (x && x->s.code == NOP) 180917683Spst x = x->next; 181017683Spst while (y && y->s.code == NOP) 181117683Spst y = y->next; 181217683Spst if (x == 0) 181317683Spst return y == 0; 181417683Spst if (y == 0) 181517683Spst return x == 0; 181617683Spst if (x->s.code != y->s.code || x->s.k != y->s.k) 181717683Spst return 0; 181817683Spst x = x->next; 181917683Spst y = y->next; 182017683Spst } 182117683Spst} 182217683Spst 182317683Spststatic inline int 182417683Spsteq_blk(b0, b1) 182517683Spst struct block *b0, *b1; 182617683Spst{ 182717683Spst if (b0->s.code == b1->s.code && 182817683Spst b0->s.k == b1->s.k && 182917683Spst b0->et.succ == b1->et.succ && 183017683Spst b0->ef.succ == b1->ef.succ) 183117683Spst return eq_slist(b0->stmts, b1->stmts); 183217683Spst return 0; 183317683Spst} 183417683Spst 183517683Spststatic void 183617683Spstintern_blocks(root) 183717683Spst struct block *root; 183817683Spst{ 183917683Spst struct block *p; 184017683Spst int i, j; 1841172677Smlaier int done1; /* don't shadow global */ 184217683Spst top: 1843172677Smlaier done1 = 1; 184417683Spst for (i = 0; i < n_blocks; ++i) 184517683Spst blocks[i]->link = 0; 184617683Spst 184717683Spst mark_code(root); 184817683Spst 184917683Spst for (i = n_blocks - 1; --i >= 0; ) { 185017683Spst if (!isMarked(blocks[i])) 185117683Spst continue; 185217683Spst for (j = i + 1; j < n_blocks; ++j) { 185317683Spst if (!isMarked(blocks[j])) 185417683Spst continue; 185517683Spst if (eq_blk(blocks[i], blocks[j])) { 185617683Spst blocks[i]->link = blocks[j]->link ? 185717683Spst blocks[j]->link : blocks[j]; 185817683Spst break; 185917683Spst } 186017683Spst } 186117683Spst } 186217683Spst for (i = 0; i < n_blocks; ++i) { 186317683Spst p = blocks[i]; 186417683Spst if (JT(p) == 0) 186517683Spst continue; 186617683Spst if (JT(p)->link) { 1867172677Smlaier done1 = 0; 186817683Spst JT(p) = JT(p)->link; 186917683Spst } 187017683Spst if (JF(p)->link) { 1871172677Smlaier done1 = 0; 187217683Spst JF(p) = JF(p)->link; 187317683Spst } 187417683Spst } 1875172677Smlaier if (!done1) 187617683Spst goto top; 187717683Spst} 187817683Spst 187917683Spststatic void 188017683Spstopt_cleanup() 188117683Spst{ 188217683Spst free((void *)vnode_base); 188317683Spst free((void *)vmap); 188417683Spst free((void *)edges); 188517683Spst free((void *)space); 188617683Spst free((void *)levels); 188717683Spst free((void *)blocks); 188817683Spst} 188917683Spst 189017683Spst/* 189117683Spst * Return the number of stmts in 's'. 189217683Spst */ 189317683Spststatic int 189417683Spstslength(s) 189517683Spst struct slist *s; 189617683Spst{ 189717683Spst int n = 0; 189817683Spst 189917683Spst for (; s; s = s->next) 190017683Spst if (s->s.code != NOP) 190117683Spst ++n; 190217683Spst return n; 190317683Spst} 190417683Spst 190517683Spst/* 190617683Spst * Return the number of nodes reachable by 'p'. 190717683Spst * All nodes should be initially unmarked. 190817683Spst */ 190917683Spststatic int 191017683Spstcount_blocks(p) 191117683Spst struct block *p; 191217683Spst{ 191317683Spst if (p == 0 || isMarked(p)) 191417683Spst return 0; 191517683Spst Mark(p); 191617683Spst return count_blocks(JT(p)) + count_blocks(JF(p)) + 1; 191717683Spst} 191817683Spst 191917683Spst/* 192017683Spst * Do a depth first search on the flow graph, numbering the 192117683Spst * the basic blocks, and entering them into the 'blocks' array.` 192217683Spst */ 192317683Spststatic void 192417683Spstnumber_blks_r(p) 192517683Spst struct block *p; 192617683Spst{ 192717683Spst int n; 192817683Spst 192917683Spst if (p == 0 || isMarked(p)) 193017683Spst return; 193117683Spst 193217683Spst Mark(p); 193317683Spst n = n_blocks++; 193417683Spst p->id = n; 193517683Spst blocks[n] = p; 193617683Spst 193717683Spst number_blks_r(JT(p)); 193817683Spst number_blks_r(JF(p)); 193917683Spst} 194017683Spst 194117683Spst/* 194217683Spst * Return the number of stmts in the flowgraph reachable by 'p'. 194317683Spst * The nodes should be unmarked before calling. 194475107Sfenner * 194575107Sfenner * Note that "stmts" means "instructions", and that this includes 194675107Sfenner * 194775107Sfenner * side-effect statements in 'p' (slength(p->stmts)); 194875107Sfenner * 194975107Sfenner * statements in the true branch from 'p' (count_stmts(JT(p))); 195075107Sfenner * 195175107Sfenner * statements in the false branch from 'p' (count_stmts(JF(p))); 195275107Sfenner * 195375107Sfenner * the conditional jump itself (1); 195475107Sfenner * 195575107Sfenner * an extra long jump if the true branch requires it (p->longjt); 195675107Sfenner * 195775107Sfenner * an extra long jump if the false branch requires it (p->longjf). 195817683Spst */ 195917683Spststatic int 196017683Spstcount_stmts(p) 196117683Spst struct block *p; 196217683Spst{ 196317683Spst int n; 196417683Spst 196517683Spst if (p == 0 || isMarked(p)) 196617683Spst return 0; 196717683Spst Mark(p); 196817683Spst n = count_stmts(JT(p)) + count_stmts(JF(p)); 196975107Sfenner return slength(p->stmts) + n + 1 + p->longjt + p->longjf; 197017683Spst} 197117683Spst 197217683Spst/* 197317683Spst * Allocate memory. All allocation is done before optimization 197417683Spst * is begun. A linear bound on the size of all data structures is computed 197517683Spst * from the total number of blocks and/or statements. 197617683Spst */ 197717683Spststatic void 197817683Spstopt_init(root) 197917683Spst struct block *root; 198017683Spst{ 198117683Spst bpf_u_int32 *p; 198217683Spst int i, n, max_stmts; 198317683Spst 198417683Spst /* 198517683Spst * First, count the blocks, so we can malloc an array to map 198617683Spst * block number to block. Then, put the blocks into the array. 198717683Spst */ 198817683Spst unMarkAll(); 198917683Spst n = count_blocks(root); 1990172677Smlaier blocks = (struct block **)calloc(n, sizeof(*blocks)); 1991127664Sbms if (blocks == NULL) 1992127664Sbms bpf_error("malloc"); 199317683Spst unMarkAll(); 199417683Spst n_blocks = 0; 199517683Spst number_blks_r(root); 199617683Spst 199717683Spst n_edges = 2 * n_blocks; 1998172677Smlaier edges = (struct edge **)calloc(n_edges, sizeof(*edges)); 1999127664Sbms if (edges == NULL) 2000127664Sbms bpf_error("malloc"); 200117683Spst 200217683Spst /* 200317683Spst * The number of levels is bounded by the number of nodes. 200417683Spst */ 2005172677Smlaier levels = (struct block **)calloc(n_blocks, sizeof(*levels)); 2006127664Sbms if (levels == NULL) 2007127664Sbms bpf_error("malloc"); 200817683Spst 200917683Spst edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1; 201017683Spst nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1; 201117683Spst 201217683Spst /* XXX */ 201317683Spst space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space) 201417683Spst + n_edges * edgewords * sizeof(*space)); 2015127664Sbms if (space == NULL) 2016127664Sbms bpf_error("malloc"); 201717683Spst p = space; 201817683Spst all_dom_sets = p; 201917683Spst for (i = 0; i < n; ++i) { 202017683Spst blocks[i]->dom = p; 202117683Spst p += nodewords; 202217683Spst } 202317683Spst all_closure_sets = p; 202417683Spst for (i = 0; i < n; ++i) { 202517683Spst blocks[i]->closure = p; 202617683Spst p += nodewords; 202717683Spst } 202817683Spst all_edge_sets = p; 202917683Spst for (i = 0; i < n; ++i) { 203017683Spst register struct block *b = blocks[i]; 203117683Spst 203217683Spst b->et.edom = p; 203317683Spst p += edgewords; 203417683Spst b->ef.edom = p; 203517683Spst p += edgewords; 203617683Spst b->et.id = i; 203717683Spst edges[i] = &b->et; 203817683Spst b->ef.id = n_blocks + i; 203917683Spst edges[n_blocks + i] = &b->ef; 204017683Spst b->et.pred = b; 204117683Spst b->ef.pred = b; 204217683Spst } 204317683Spst max_stmts = 0; 204417683Spst for (i = 0; i < n; ++i) 204517683Spst max_stmts += slength(blocks[i]->stmts) + 1; 204617683Spst /* 204717683Spst * We allocate at most 3 value numbers per statement, 204817683Spst * so this is an upper bound on the number of valnodes 204917683Spst * we'll need. 205017683Spst */ 205117683Spst maxval = 3 * max_stmts; 2052172677Smlaier vmap = (struct vmapinfo *)calloc(maxval, sizeof(*vmap)); 2053172677Smlaier vnode_base = (struct valnode *)calloc(maxval, sizeof(*vnode_base)); 2054127664Sbms if (vmap == NULL || vnode_base == NULL) 2055127664Sbms bpf_error("malloc"); 205617683Spst} 205717683Spst 205817683Spst/* 205917683Spst * Some pointers used to convert the basic block form of the code, 206017683Spst * into the array form that BPF requires. 'fstart' will point to 206117683Spst * the malloc'd array while 'ftail' is used during the recursive traversal. 206217683Spst */ 206317683Spststatic struct bpf_insn *fstart; 206417683Spststatic struct bpf_insn *ftail; 206517683Spst 206617683Spst#ifdef BDEBUG 206717683Spstint bids[1000]; 206817683Spst#endif 206917683Spst 207017683Spst/* 207117683Spst * Returns true if successful. Returns false if a branch has 207217683Spst * an offset that is too large. If so, we have marked that 207317683Spst * branch so that on a subsequent iteration, it will be treated 207417683Spst * properly. 207517683Spst */ 207617683Spststatic int 207717683Spstconvert_code_r(p) 207817683Spst struct block *p; 207917683Spst{ 208017683Spst struct bpf_insn *dst; 208117683Spst struct slist *src; 208217683Spst int slen; 208317683Spst u_int off; 208417683Spst int extrajmps; /* number of extra jumps inserted */ 208556889Sfenner struct slist **offset = NULL; 208617683Spst 208717683Spst if (p == 0 || isMarked(p)) 208817683Spst return (1); 208917683Spst Mark(p); 209017683Spst 209117683Spst if (convert_code_r(JF(p)) == 0) 209217683Spst return (0); 209317683Spst if (convert_code_r(JT(p)) == 0) 209417683Spst return (0); 209517683Spst 209617683Spst slen = slength(p->stmts); 209717683Spst dst = ftail -= (slen + 1 + p->longjt + p->longjf); 209817683Spst /* inflate length by any extra jumps */ 209917683Spst 210017683Spst p->offset = dst - fstart; 210117683Spst 210256889Sfenner /* generate offset[] for convenience */ 210356889Sfenner if (slen) { 2104127664Sbms offset = (struct slist **)calloc(slen, sizeof(struct slist *)); 210556889Sfenner if (!offset) { 210656889Sfenner bpf_error("not enough core"); 210756889Sfenner /*NOTREACHED*/ 210856889Sfenner } 210956889Sfenner } 211056889Sfenner src = p->stmts; 211156889Sfenner for (off = 0; off < slen && src; off++) { 211256889Sfenner#if 0 211356889Sfenner printf("off=%d src=%x\n", off, src); 211456889Sfenner#endif 211556889Sfenner offset[off] = src; 211656889Sfenner src = src->next; 211756889Sfenner } 211856889Sfenner 211956889Sfenner off = 0; 212017683Spst for (src = p->stmts; src; src = src->next) { 212117683Spst if (src->s.code == NOP) 212217683Spst continue; 212317683Spst dst->code = (u_short)src->s.code; 212417683Spst dst->k = src->s.k; 212556889Sfenner 212656889Sfenner /* fill block-local relative jump */ 212775107Sfenner if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) { 212856889Sfenner#if 0 212956889Sfenner if (src->s.jt || src->s.jf) { 213056889Sfenner bpf_error("illegal jmp destination"); 213156889Sfenner /*NOTREACHED*/ 213256889Sfenner } 213356889Sfenner#endif 213456889Sfenner goto filled; 213556889Sfenner } 213656889Sfenner if (off == slen - 2) /*???*/ 213756889Sfenner goto filled; 213856889Sfenner 213956889Sfenner { 214056889Sfenner int i; 214156889Sfenner int jt, jf; 2142172677Smlaier const char *ljerr = "%s for block-local relative jump: off=%d"; 214356889Sfenner 214456889Sfenner#if 0 214556889Sfenner printf("code=%x off=%d %x %x\n", src->s.code, 214656889Sfenner off, src->s.jt, src->s.jf); 214756889Sfenner#endif 214856889Sfenner 214956889Sfenner if (!src->s.jt || !src->s.jf) { 215056889Sfenner bpf_error(ljerr, "no jmp destination", off); 215156889Sfenner /*NOTREACHED*/ 215256889Sfenner } 215356889Sfenner 215456889Sfenner jt = jf = 0; 215556889Sfenner for (i = 0; i < slen; i++) { 215656889Sfenner if (offset[i] == src->s.jt) { 215756889Sfenner if (jt) { 215856889Sfenner bpf_error(ljerr, "multiple matches", off); 215956889Sfenner /*NOTREACHED*/ 216056889Sfenner } 216156889Sfenner 216256889Sfenner dst->jt = i - off - 1; 216356889Sfenner jt++; 216456889Sfenner } 216556889Sfenner if (offset[i] == src->s.jf) { 216656889Sfenner if (jf) { 216756889Sfenner bpf_error(ljerr, "multiple matches", off); 216856889Sfenner /*NOTREACHED*/ 216956889Sfenner } 217056889Sfenner dst->jf = i - off - 1; 217156889Sfenner jf++; 217256889Sfenner } 217356889Sfenner } 217456889Sfenner if (!jt || !jf) { 217556889Sfenner bpf_error(ljerr, "no destination found", off); 217656889Sfenner /*NOTREACHED*/ 217756889Sfenner } 217856889Sfenner } 217956889Sfennerfilled: 218017683Spst ++dst; 218156889Sfenner ++off; 218217683Spst } 218356889Sfenner if (offset) 218456889Sfenner free(offset); 218556889Sfenner 218617683Spst#ifdef BDEBUG 218717683Spst bids[dst - fstart] = p->id + 1; 218817683Spst#endif 218917683Spst dst->code = (u_short)p->s.code; 219017683Spst dst->k = p->s.k; 219117683Spst if (JT(p)) { 219217683Spst extrajmps = 0; 219317683Spst off = JT(p)->offset - (p->offset + slen) - 1; 219417683Spst if (off >= 256) { 219517683Spst /* offset too large for branch, must add a jump */ 219617683Spst if (p->longjt == 0) { 219717683Spst /* mark this instruction and retry */ 219817683Spst p->longjt++; 219917683Spst return(0); 220017683Spst } 220117683Spst /* branch if T to following jump */ 220217683Spst dst->jt = extrajmps; 220317683Spst extrajmps++; 220417683Spst dst[extrajmps].code = BPF_JMP|BPF_JA; 220517683Spst dst[extrajmps].k = off - extrajmps; 220617683Spst } 220717683Spst else 220817683Spst dst->jt = off; 220917683Spst off = JF(p)->offset - (p->offset + slen) - 1; 221017683Spst if (off >= 256) { 221117683Spst /* offset too large for branch, must add a jump */ 221217683Spst if (p->longjf == 0) { 221317683Spst /* mark this instruction and retry */ 221417683Spst p->longjf++; 221517683Spst return(0); 221617683Spst } 221717683Spst /* branch if F to following jump */ 221817683Spst /* if two jumps are inserted, F goes to second one */ 221917683Spst dst->jf = extrajmps; 222017683Spst extrajmps++; 222117683Spst dst[extrajmps].code = BPF_JMP|BPF_JA; 222217683Spst dst[extrajmps].k = off - extrajmps; 222317683Spst } 222417683Spst else 222517683Spst dst->jf = off; 222617683Spst } 222717683Spst return (1); 222817683Spst} 222917683Spst 223017683Spst 223117683Spst/* 223217683Spst * Convert flowgraph intermediate representation to the 223317683Spst * BPF array representation. Set *lenp to the number of instructions. 2234172677Smlaier * 2235172677Smlaier * This routine does *NOT* leak the memory pointed to by fp. It *must 2236172677Smlaier * not* do free(fp) before returning fp; doing so would make no sense, 2237172677Smlaier * as the BPF array pointed to by the return value of icode_to_fcode() 2238172677Smlaier * must be valid - it's being returned for use in a bpf_program structure. 2239172677Smlaier * 2240172677Smlaier * If it appears that icode_to_fcode() is leaking, the problem is that 2241172677Smlaier * the program using pcap_compile() is failing to free the memory in 2242172677Smlaier * the BPF program when it's done - the leak is in the program, not in 2243172677Smlaier * the routine that happens to be allocating the memory. (By analogy, if 2244172677Smlaier * a program calls fopen() without ever calling fclose() on the FILE *, 2245172677Smlaier * it will leak the FILE structure; the leak is not in fopen(), it's in 2246172677Smlaier * the program.) Change the program to use pcap_freecode() when it's 2247172677Smlaier * done with the filter program. See the pcap man page. 224817683Spst */ 224917683Spststruct bpf_insn * 225017683Spsticode_to_fcode(root, lenp) 225117683Spst struct block *root; 225217683Spst int *lenp; 225317683Spst{ 225417683Spst int n; 225517683Spst struct bpf_insn *fp; 225617683Spst 225717683Spst /* 225898530Sfenner * Loop doing convert_code_r() until no branches remain 225917683Spst * with too-large offsets. 226017683Spst */ 226117683Spst while (1) { 226217683Spst unMarkAll(); 226317683Spst n = *lenp = count_stmts(root); 2264127664Sbms 226517683Spst fp = (struct bpf_insn *)malloc(sizeof(*fp) * n); 2266127664Sbms if (fp == NULL) 2267127664Sbms bpf_error("malloc"); 226817683Spst memset((char *)fp, 0, sizeof(*fp) * n); 226917683Spst fstart = fp; 227017683Spst ftail = fp + n; 2271127664Sbms 227217683Spst unMarkAll(); 227317683Spst if (convert_code_r(root)) 227417683Spst break; 227517683Spst free(fp); 227617683Spst } 227717683Spst 227817683Spst return fp; 227917683Spst} 228017683Spst 228175107Sfenner/* 228275107Sfenner * Make a copy of a BPF program and put it in the "fcode" member of 228375107Sfenner * a "pcap_t". 228475107Sfenner * 228575107Sfenner * If we fail to allocate memory for the copy, fill in the "errbuf" 228675107Sfenner * member of the "pcap_t" with an error message, and return -1; 228775107Sfenner * otherwise, return 0. 228875107Sfenner */ 228975107Sfennerint 229075107Sfennerinstall_bpf_program(pcap_t *p, struct bpf_program *fp) 229175107Sfenner{ 229275107Sfenner size_t prog_size; 229375107Sfenner 229475107Sfenner /* 2295190225Srpaulo * Validate the program. 2296190225Srpaulo */ 2297190225Srpaulo if (!bpf_validate(fp->bf_insns, fp->bf_len)) { 2298190225Srpaulo snprintf(p->errbuf, sizeof(p->errbuf), 2299190225Srpaulo "BPF program is not valid"); 2300190225Srpaulo return (-1); 2301190225Srpaulo } 2302190225Srpaulo 2303190225Srpaulo /* 230475107Sfenner * Free up any already installed program. 230575107Sfenner */ 230675107Sfenner pcap_freecode(&p->fcode); 230775107Sfenner 230875107Sfenner prog_size = sizeof(*fp->bf_insns) * fp->bf_len; 230975107Sfenner p->fcode.bf_len = fp->bf_len; 231075107Sfenner p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size); 231175107Sfenner if (p->fcode.bf_insns == NULL) { 231275107Sfenner snprintf(p->errbuf, sizeof(p->errbuf), 231375107Sfenner "malloc: %s", pcap_strerror(errno)); 231475107Sfenner return (-1); 231575107Sfenner } 231675107Sfenner memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size); 231775107Sfenner return (0); 231875107Sfenner} 231975107Sfenner 232017683Spst#ifdef BDEBUG 232117683Spststatic void 232217683Spstopt_dump(root) 232317683Spst struct block *root; 232417683Spst{ 232517683Spst struct bpf_program f; 232617683Spst 232717683Spst memset(bids, 0, sizeof bids); 232817683Spst f.bf_insns = icode_to_fcode(root, &f.bf_len); 232917683Spst bpf_dump(&f, 1); 233017683Spst putchar('\n'); 233117683Spst free((char *)f.bf_insns); 233217683Spst} 233317683Spst#endif 2334