optimize.c revision 127664
117683Spst/* 217683Spst * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996 317683Spst * The Regents of the University of California. All rights reserved. 417683Spst * 517683Spst * Redistribution and use in source and binary forms, with or without 617683Spst * modification, are permitted provided that: (1) source code distributions 717683Spst * retain the above copyright notice and this paragraph in its entirety, (2) 817683Spst * distributions including binary code include the above copyright notice and 917683Spst * this paragraph in its entirety in the documentation or other materials 1017683Spst * provided with the distribution, and (3) all advertising materials mentioning 1117683Spst * features or use of this software display the following acknowledgement: 1217683Spst * ``This product includes software developed by the University of California, 1317683Spst * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of 1417683Spst * the University nor the names of its contributors may be used to endorse 1517683Spst * or promote products derived from this software without specific prior 1617683Spst * written permission. 1717683Spst * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 1817683Spst * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 1917683Spst * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 2017683Spst * 2117683Spst * Optimization module for tcpdump intermediate representation. 2217683Spst */ 2317683Spst#ifndef lint 24127664Sbmsstatic const char rcsid[] _U_ = 25127664Sbms "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.76.2.3 2003/12/22 00:26:36 guy Exp $ (LBL)"; 2617683Spst#endif 2717683Spst 2875107Sfenner#ifdef HAVE_CONFIG_H 2975107Sfenner#include "config.h" 3075107Sfenner#endif 3175107Sfenner 3217683Spst#include <stdio.h> 3317683Spst#include <stdlib.h> 3417683Spst#include <memory.h> 3517683Spst 3675107Sfenner#include <errno.h> 3775107Sfenner 3817683Spst#include "pcap-int.h" 3917683Spst 4017683Spst#include "gencode.h" 4117683Spst 4217683Spst#ifdef HAVE_OS_PROTO_H 4317683Spst#include "os-proto.h" 4417683Spst#endif 4517683Spst 4617683Spst#ifdef BDEBUG 4717683Spstextern int dflag; 4817683Spst#endif 4917683Spst 5017683Spst#define A_ATOM BPF_MEMWORDS 5117683Spst#define X_ATOM (BPF_MEMWORDS+1) 5217683Spst 5317683Spst#define NOP -1 5417683Spst 5517683Spst/* 5617683Spst * This define is used to represent *both* the accumulator and 5717683Spst * x register in use-def computations. 5817683Spst * Currently, the use-def code assumes only one definition per instruction. 5917683Spst */ 6017683Spst#define AX_ATOM N_ATOMS 6117683Spst 6217683Spst/* 6317683Spst * A flag to indicate that further optimization is needed. 6417683Spst * Iterative passes are continued until a given pass yields no 6517683Spst * branch movement. 6617683Spst */ 6717683Spststatic int done; 6817683Spst 6917683Spst/* 7017683Spst * A block is marked if only if its mark equals the current mark. 7117683Spst * Rather than traverse the code array, marking each item, 'cur_mark' is 7217683Spst * incremented. This automatically makes each element unmarked. 7317683Spst */ 7417683Spststatic int cur_mark; 7517683Spst#define isMarked(p) ((p)->mark == cur_mark) 7617683Spst#define unMarkAll() cur_mark += 1 7717683Spst#define Mark(p) ((p)->mark = cur_mark) 7817683Spst 7917683Spststatic void opt_init(struct block *); 8017683Spststatic void opt_cleanup(void); 8117683Spst 8217683Spststatic void make_marks(struct block *); 8317683Spststatic void mark_code(struct block *); 8417683Spst 8517683Spststatic void intern_blocks(struct block *); 8617683Spst 8717683Spststatic int eq_slist(struct slist *, struct slist *); 8817683Spst 8917683Spststatic void find_levels_r(struct block *); 9017683Spst 9117683Spststatic void find_levels(struct block *); 9217683Spststatic void find_dom(struct block *); 9317683Spststatic void propedom(struct edge *); 9417683Spststatic void find_edom(struct block *); 9517683Spststatic void find_closure(struct block *); 9617683Spststatic int atomuse(struct stmt *); 9717683Spststatic int atomdef(struct stmt *); 9817683Spststatic void compute_local_ud(struct block *); 9917683Spststatic void find_ud(struct block *); 10017683Spststatic void init_val(void); 10117683Spststatic int F(int, int, int); 10217683Spststatic inline void vstore(struct stmt *, int *, int, int); 10317683Spststatic void opt_blk(struct block *, int); 10417683Spststatic int use_conflict(struct block *, struct block *); 10517683Spststatic void opt_j(struct edge *); 10617683Spststatic void or_pullup(struct block *); 10717683Spststatic void and_pullup(struct block *); 10817683Spststatic void opt_blks(struct block *, int); 10917683Spststatic inline void link_inedge(struct edge *, struct block *); 11017683Spststatic void find_inedges(struct block *); 11117683Spststatic void opt_root(struct block **); 11217683Spststatic void opt_loop(struct block *, int); 11317683Spststatic void fold_op(struct stmt *, int, int); 11417683Spststatic inline struct slist *this_op(struct slist *); 11517683Spststatic void opt_not(struct block *); 11617683Spststatic void opt_peep(struct block *); 11717683Spststatic void opt_stmt(struct stmt *, int[], int); 11817683Spststatic void deadstmt(struct stmt *, struct stmt *[]); 11917683Spststatic void opt_deadstores(struct block *); 12017683Spststatic struct block *fold_edge(struct block *, struct edge *); 12117683Spststatic inline int eq_blk(struct block *, struct block *); 12217683Spststatic int slength(struct slist *); 12317683Spststatic int count_blocks(struct block *); 12417683Spststatic void number_blks_r(struct block *); 12517683Spststatic int count_stmts(struct block *); 12617683Spststatic int convert_code_r(struct block *); 12717683Spst#ifdef BDEBUG 12817683Spststatic void opt_dump(struct block *); 12917683Spst#endif 13017683Spst 13117683Spststatic int n_blocks; 13217683Spststruct block **blocks; 13317683Spststatic int n_edges; 13417683Spststruct edge **edges; 13517683Spst 13617683Spst/* 13717683Spst * A bit vector set representation of the dominators. 13817683Spst * We round up the set size to the next power of two. 13917683Spst */ 14017683Spststatic int nodewords; 14117683Spststatic int edgewords; 14217683Spststruct block **levels; 14317683Spstbpf_u_int32 *space; 14417683Spst#define BITS_PER_WORD (8*sizeof(bpf_u_int32)) 14517683Spst/* 14617683Spst * True if a is in uset {p} 14717683Spst */ 14817683Spst#define SET_MEMBER(p, a) \ 14917683Spst((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD))) 15017683Spst 15117683Spst/* 15217683Spst * Add 'a' to uset p. 15317683Spst */ 15417683Spst#define SET_INSERT(p, a) \ 15517683Spst(p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD)) 15617683Spst 15717683Spst/* 15817683Spst * Delete 'a' from uset p. 15917683Spst */ 16017683Spst#define SET_DELETE(p, a) \ 16117683Spst(p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD)) 16217683Spst 16317683Spst/* 16417683Spst * a := a intersect b 16517683Spst */ 16617683Spst#define SET_INTERSECT(a, b, n)\ 16717683Spst{\ 16817683Spst register bpf_u_int32 *_x = a, *_y = b;\ 16917683Spst register int _n = n;\ 17017683Spst while (--_n >= 0) *_x++ &= *_y++;\ 17117683Spst} 17217683Spst 17317683Spst/* 17417683Spst * a := a - b 17517683Spst */ 17617683Spst#define SET_SUBTRACT(a, b, n)\ 17717683Spst{\ 17817683Spst register bpf_u_int32 *_x = a, *_y = b;\ 17917683Spst register int _n = n;\ 18017683Spst while (--_n >= 0) *_x++ &=~ *_y++;\ 18117683Spst} 18217683Spst 18317683Spst/* 18417683Spst * a := a union b 18517683Spst */ 18617683Spst#define SET_UNION(a, b, n)\ 18717683Spst{\ 18817683Spst register bpf_u_int32 *_x = a, *_y = b;\ 18917683Spst register int _n = n;\ 19017683Spst while (--_n >= 0) *_x++ |= *_y++;\ 19117683Spst} 19217683Spst 19317683Spststatic uset all_dom_sets; 19417683Spststatic uset all_closure_sets; 19517683Spststatic uset all_edge_sets; 19617683Spst 19717683Spst#ifndef MAX 19817683Spst#define MAX(a,b) ((a)>(b)?(a):(b)) 19917683Spst#endif 20017683Spst 20117683Spststatic void 20217683Spstfind_levels_r(b) 20317683Spst struct block *b; 20417683Spst{ 20517683Spst int level; 20617683Spst 20717683Spst if (isMarked(b)) 20817683Spst return; 20917683Spst 21017683Spst Mark(b); 21117683Spst b->link = 0; 21217683Spst 21317683Spst if (JT(b)) { 21417683Spst find_levels_r(JT(b)); 21517683Spst find_levels_r(JF(b)); 21617683Spst level = MAX(JT(b)->level, JF(b)->level) + 1; 21717683Spst } else 21817683Spst level = 0; 21917683Spst b->level = level; 22017683Spst b->link = levels[level]; 22117683Spst levels[level] = b; 22217683Spst} 22317683Spst 22417683Spst/* 22517683Spst * Level graph. The levels go from 0 at the leaves to 22617683Spst * N_LEVELS at the root. The levels[] array points to the 22717683Spst * first node of the level list, whose elements are linked 22817683Spst * with the 'link' field of the struct block. 22917683Spst */ 23017683Spststatic void 23117683Spstfind_levels(root) 23217683Spst struct block *root; 23317683Spst{ 23417683Spst memset((char *)levels, 0, n_blocks * sizeof(*levels)); 23517683Spst unMarkAll(); 23617683Spst find_levels_r(root); 23717683Spst} 23817683Spst 23917683Spst/* 24017683Spst * Find dominator relationships. 24117683Spst * Assumes graph has been leveled. 24217683Spst */ 24317683Spststatic void 24417683Spstfind_dom(root) 24517683Spst struct block *root; 24617683Spst{ 24717683Spst int i; 24817683Spst struct block *b; 24917683Spst bpf_u_int32 *x; 25017683Spst 25117683Spst /* 25217683Spst * Initialize sets to contain all nodes. 25317683Spst */ 25417683Spst x = all_dom_sets; 25517683Spst i = n_blocks * nodewords; 25617683Spst while (--i >= 0) 25717683Spst *x++ = ~0; 25817683Spst /* Root starts off empty. */ 25917683Spst for (i = nodewords; --i >= 0;) 26017683Spst root->dom[i] = 0; 26117683Spst 26217683Spst /* root->level is the highest level no found. */ 26317683Spst for (i = root->level; i >= 0; --i) { 26417683Spst for (b = levels[i]; b; b = b->link) { 26517683Spst SET_INSERT(b->dom, b->id); 26617683Spst if (JT(b) == 0) 26717683Spst continue; 26817683Spst SET_INTERSECT(JT(b)->dom, b->dom, nodewords); 26917683Spst SET_INTERSECT(JF(b)->dom, b->dom, nodewords); 27017683Spst } 27117683Spst } 27217683Spst} 27317683Spst 27417683Spststatic void 27517683Spstpropedom(ep) 27617683Spst struct edge *ep; 27717683Spst{ 27817683Spst SET_INSERT(ep->edom, ep->id); 27917683Spst if (ep->succ) { 28017683Spst SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords); 28117683Spst SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords); 28217683Spst } 28317683Spst} 28417683Spst 28517683Spst/* 28617683Spst * Compute edge dominators. 28717683Spst * Assumes graph has been leveled and predecessors established. 28817683Spst */ 28917683Spststatic void 29017683Spstfind_edom(root) 29117683Spst struct block *root; 29217683Spst{ 29317683Spst int i; 29417683Spst uset x; 29517683Spst struct block *b; 29617683Spst 29717683Spst x = all_edge_sets; 29817683Spst for (i = n_edges * edgewords; --i >= 0; ) 29917683Spst x[i] = ~0; 30017683Spst 30117683Spst /* root->level is the highest level no found. */ 30217683Spst memset(root->et.edom, 0, edgewords * sizeof(*(uset)0)); 30317683Spst memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0)); 30417683Spst for (i = root->level; i >= 0; --i) { 30517683Spst for (b = levels[i]; b != 0; b = b->link) { 30617683Spst propedom(&b->et); 30717683Spst propedom(&b->ef); 30817683Spst } 30917683Spst } 31017683Spst} 31117683Spst 31217683Spst/* 31317683Spst * Find the backwards transitive closure of the flow graph. These sets 31417683Spst * are backwards in the sense that we find the set of nodes that reach 31517683Spst * a given node, not the set of nodes that can be reached by a node. 31617683Spst * 31717683Spst * Assumes graph has been leveled. 31817683Spst */ 31917683Spststatic void 32017683Spstfind_closure(root) 32117683Spst struct block *root; 32217683Spst{ 32317683Spst int i; 32417683Spst struct block *b; 32517683Spst 32617683Spst /* 32717683Spst * Initialize sets to contain no nodes. 32817683Spst */ 32917683Spst memset((char *)all_closure_sets, 0, 33017683Spst n_blocks * nodewords * sizeof(*all_closure_sets)); 33117683Spst 33217683Spst /* root->level is the highest level no found. */ 33317683Spst for (i = root->level; i >= 0; --i) { 33417683Spst for (b = levels[i]; b; b = b->link) { 33517683Spst SET_INSERT(b->closure, b->id); 33617683Spst if (JT(b) == 0) 33717683Spst continue; 33817683Spst SET_UNION(JT(b)->closure, b->closure, nodewords); 33917683Spst SET_UNION(JF(b)->closure, b->closure, nodewords); 34017683Spst } 34117683Spst } 34217683Spst} 34317683Spst 34417683Spst/* 34517683Spst * Return the register number that is used by s. If A and X are both 34617683Spst * used, return AX_ATOM. If no register is used, return -1. 34717683Spst * 34817683Spst * The implementation should probably change to an array access. 34917683Spst */ 35017683Spststatic int 35117683Spstatomuse(s) 35217683Spst struct stmt *s; 35317683Spst{ 35417683Spst register int c = s->code; 35517683Spst 35617683Spst if (c == NOP) 35717683Spst return -1; 35817683Spst 35917683Spst switch (BPF_CLASS(c)) { 36017683Spst 36117683Spst case BPF_RET: 36217683Spst return (BPF_RVAL(c) == BPF_A) ? A_ATOM : 36317683Spst (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1; 36417683Spst 36517683Spst case BPF_LD: 36617683Spst case BPF_LDX: 36717683Spst return (BPF_MODE(c) == BPF_IND) ? X_ATOM : 36817683Spst (BPF_MODE(c) == BPF_MEM) ? s->k : -1; 36917683Spst 37017683Spst case BPF_ST: 37117683Spst return A_ATOM; 37217683Spst 37317683Spst case BPF_STX: 37417683Spst return X_ATOM; 37517683Spst 37617683Spst case BPF_JMP: 37717683Spst case BPF_ALU: 37817683Spst if (BPF_SRC(c) == BPF_X) 37917683Spst return AX_ATOM; 38017683Spst return A_ATOM; 38117683Spst 38217683Spst case BPF_MISC: 38317683Spst return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM; 38417683Spst } 38517683Spst abort(); 38617683Spst /* NOTREACHED */ 38717683Spst} 38817683Spst 38917683Spst/* 39017683Spst * Return the register number that is defined by 's'. We assume that 39117683Spst * a single stmt cannot define more than one register. If no register 39217683Spst * is defined, return -1. 39317683Spst * 39417683Spst * The implementation should probably change to an array access. 39517683Spst */ 39617683Spststatic int 39717683Spstatomdef(s) 39817683Spst struct stmt *s; 39917683Spst{ 40017683Spst if (s->code == NOP) 40117683Spst return -1; 40217683Spst 40317683Spst switch (BPF_CLASS(s->code)) { 40417683Spst 40517683Spst case BPF_LD: 40617683Spst case BPF_ALU: 40717683Spst return A_ATOM; 40817683Spst 40917683Spst case BPF_LDX: 41017683Spst return X_ATOM; 41117683Spst 41217683Spst case BPF_ST: 41317683Spst case BPF_STX: 41417683Spst return s->k; 41517683Spst 41617683Spst case BPF_MISC: 41717683Spst return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM; 41817683Spst } 41917683Spst return -1; 42017683Spst} 42117683Spst 42217683Spststatic void 42317683Spstcompute_local_ud(b) 42417683Spst struct block *b; 42517683Spst{ 42617683Spst struct slist *s; 42717683Spst atomset def = 0, use = 0, kill = 0; 42817683Spst int atom; 42917683Spst 43017683Spst for (s = b->stmts; s; s = s->next) { 43117683Spst if (s->s.code == NOP) 43217683Spst continue; 43317683Spst atom = atomuse(&s->s); 43417683Spst if (atom >= 0) { 43517683Spst if (atom == AX_ATOM) { 43617683Spst if (!ATOMELEM(def, X_ATOM)) 43717683Spst use |= ATOMMASK(X_ATOM); 43817683Spst if (!ATOMELEM(def, A_ATOM)) 43917683Spst use |= ATOMMASK(A_ATOM); 44017683Spst } 44117683Spst else if (atom < N_ATOMS) { 44217683Spst if (!ATOMELEM(def, atom)) 44317683Spst use |= ATOMMASK(atom); 44417683Spst } 44517683Spst else 44617683Spst abort(); 44717683Spst } 44817683Spst atom = atomdef(&s->s); 44917683Spst if (atom >= 0) { 45017683Spst if (!ATOMELEM(use, atom)) 45117683Spst kill |= ATOMMASK(atom); 45217683Spst def |= ATOMMASK(atom); 45317683Spst } 45417683Spst } 45517683Spst if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP) 45617683Spst use |= ATOMMASK(A_ATOM); 45717683Spst 45817683Spst b->def = def; 45917683Spst b->kill = kill; 46017683Spst b->in_use = use; 46117683Spst} 46217683Spst 46317683Spst/* 46417683Spst * Assume graph is already leveled. 46517683Spst */ 46617683Spststatic void 46717683Spstfind_ud(root) 46817683Spst struct block *root; 46917683Spst{ 47017683Spst int i, maxlevel; 47117683Spst struct block *p; 47217683Spst 47317683Spst /* 47417683Spst * root->level is the highest level no found; 47517683Spst * count down from there. 47617683Spst */ 47717683Spst maxlevel = root->level; 47817683Spst for (i = maxlevel; i >= 0; --i) 47917683Spst for (p = levels[i]; p; p = p->link) { 48017683Spst compute_local_ud(p); 48117683Spst p->out_use = 0; 48217683Spst } 48317683Spst 48417683Spst for (i = 1; i <= maxlevel; ++i) { 48517683Spst for (p = levels[i]; p; p = p->link) { 48617683Spst p->out_use |= JT(p)->in_use | JF(p)->in_use; 48717683Spst p->in_use |= p->out_use &~ p->kill; 48817683Spst } 48917683Spst } 49017683Spst} 49117683Spst 49217683Spst/* 49317683Spst * These data structures are used in a Cocke and Shwarz style 49417683Spst * value numbering scheme. Since the flowgraph is acyclic, 49517683Spst * exit values can be propagated from a node's predecessors 49617683Spst * provided it is uniquely defined. 49717683Spst */ 49817683Spststruct valnode { 49917683Spst int code; 50017683Spst int v0, v1; 50117683Spst int val; 50217683Spst struct valnode *next; 50317683Spst}; 50417683Spst 50517683Spst#define MODULUS 213 50617683Spststatic struct valnode *hashtbl[MODULUS]; 50717683Spststatic int curval; 50817683Spststatic int maxval; 50917683Spst 51017683Spst/* Integer constants mapped with the load immediate opcode. */ 51117683Spst#define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L) 51217683Spst 51317683Spststruct vmapinfo { 51417683Spst int is_const; 51517683Spst bpf_int32 const_val; 51617683Spst}; 51717683Spst 51817683Spststruct vmapinfo *vmap; 51917683Spststruct valnode *vnode_base; 52017683Spststruct valnode *next_vnode; 52117683Spst 52217683Spststatic void 52317683Spstinit_val() 52417683Spst{ 52517683Spst curval = 0; 52617683Spst next_vnode = vnode_base; 52717683Spst memset((char *)vmap, 0, maxval * sizeof(*vmap)); 52817683Spst memset((char *)hashtbl, 0, sizeof hashtbl); 52917683Spst} 53017683Spst 53117683Spst/* Because we really don't have an IR, this stuff is a little messy. */ 53217683Spststatic int 53317683SpstF(code, v0, v1) 53417683Spst int code; 53517683Spst int v0, v1; 53617683Spst{ 53717683Spst u_int hash; 53817683Spst int val; 53917683Spst struct valnode *p; 54017683Spst 54117683Spst hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8); 54217683Spst hash %= MODULUS; 54317683Spst 54417683Spst for (p = hashtbl[hash]; p; p = p->next) 54517683Spst if (p->code == code && p->v0 == v0 && p->v1 == v1) 54617683Spst return p->val; 54717683Spst 54817683Spst val = ++curval; 54917683Spst if (BPF_MODE(code) == BPF_IMM && 55017683Spst (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) { 55117683Spst vmap[val].const_val = v0; 55217683Spst vmap[val].is_const = 1; 55317683Spst } 55417683Spst p = next_vnode++; 55517683Spst p->val = val; 55617683Spst p->code = code; 55717683Spst p->v0 = v0; 55817683Spst p->v1 = v1; 55917683Spst p->next = hashtbl[hash]; 56017683Spst hashtbl[hash] = p; 56117683Spst 56217683Spst return val; 56317683Spst} 56417683Spst 56517683Spststatic inline void 56617683Spstvstore(s, valp, newval, alter) 56717683Spst struct stmt *s; 56817683Spst int *valp; 56917683Spst int newval; 57017683Spst int alter; 57117683Spst{ 57217683Spst if (alter && *valp == newval) 57317683Spst s->code = NOP; 57417683Spst else 57517683Spst *valp = newval; 57617683Spst} 57717683Spst 57817683Spststatic void 57917683Spstfold_op(s, v0, v1) 58017683Spst struct stmt *s; 58117683Spst int v0, v1; 58217683Spst{ 58317683Spst bpf_int32 a, b; 58417683Spst 58517683Spst a = vmap[v0].const_val; 58617683Spst b = vmap[v1].const_val; 58717683Spst 58817683Spst switch (BPF_OP(s->code)) { 58917683Spst case BPF_ADD: 59017683Spst a += b; 59117683Spst break; 59217683Spst 59317683Spst case BPF_SUB: 59417683Spst a -= b; 59517683Spst break; 59617683Spst 59717683Spst case BPF_MUL: 59817683Spst a *= b; 59917683Spst break; 60017683Spst 60117683Spst case BPF_DIV: 60217683Spst if (b == 0) 60317683Spst bpf_error("division by zero"); 60417683Spst a /= b; 60517683Spst break; 60617683Spst 60717683Spst case BPF_AND: 60817683Spst a &= b; 60917683Spst break; 61017683Spst 61117683Spst case BPF_OR: 61217683Spst a |= b; 61317683Spst break; 61417683Spst 61517683Spst case BPF_LSH: 61617683Spst a <<= b; 61717683Spst break; 61817683Spst 61917683Spst case BPF_RSH: 62017683Spst a >>= b; 62117683Spst break; 62217683Spst 62317683Spst case BPF_NEG: 62417683Spst a = -a; 62517683Spst break; 62617683Spst 62717683Spst default: 62817683Spst abort(); 62917683Spst } 63017683Spst s->k = a; 63117683Spst s->code = BPF_LD|BPF_IMM; 63217683Spst done = 0; 63317683Spst} 63417683Spst 63517683Spststatic inline struct slist * 63617683Spstthis_op(s) 63717683Spst struct slist *s; 63817683Spst{ 63917683Spst while (s != 0 && s->s.code == NOP) 64017683Spst s = s->next; 64117683Spst return s; 64217683Spst} 64317683Spst 64417683Spststatic void 64517683Spstopt_not(b) 64617683Spst struct block *b; 64717683Spst{ 64817683Spst struct block *tmp = JT(b); 64917683Spst 65017683Spst JT(b) = JF(b); 65117683Spst JF(b) = tmp; 65217683Spst} 65317683Spst 65417683Spststatic void 65517683Spstopt_peep(b) 65617683Spst struct block *b; 65717683Spst{ 65817683Spst struct slist *s; 65917683Spst struct slist *next, *last; 66017683Spst int val; 66117683Spst 66217683Spst s = b->stmts; 66317683Spst if (s == 0) 66417683Spst return; 66517683Spst 66617683Spst last = s; 66717683Spst while (1) { 66817683Spst s = this_op(s); 66917683Spst if (s == 0) 67017683Spst break; 67117683Spst next = this_op(s->next); 67217683Spst if (next == 0) 67317683Spst break; 67417683Spst last = next; 67517683Spst 67617683Spst /* 67717683Spst * st M[k] --> st M[k] 67817683Spst * ldx M[k] tax 67917683Spst */ 68017683Spst if (s->s.code == BPF_ST && 68117683Spst next->s.code == (BPF_LDX|BPF_MEM) && 68217683Spst s->s.k == next->s.k) { 68317683Spst done = 0; 68417683Spst next->s.code = BPF_MISC|BPF_TAX; 68517683Spst } 68617683Spst /* 68717683Spst * ld #k --> ldx #k 68817683Spst * tax txa 68917683Spst */ 69017683Spst if (s->s.code == (BPF_LD|BPF_IMM) && 69117683Spst next->s.code == (BPF_MISC|BPF_TAX)) { 69217683Spst s->s.code = BPF_LDX|BPF_IMM; 69317683Spst next->s.code = BPF_MISC|BPF_TXA; 69417683Spst done = 0; 69517683Spst } 69617683Spst /* 69717683Spst * This is an ugly special case, but it happens 69817683Spst * when you say tcp[k] or udp[k] where k is a constant. 69917683Spst */ 70017683Spst if (s->s.code == (BPF_LD|BPF_IMM)) { 70117683Spst struct slist *add, *tax, *ild; 70217683Spst 70317683Spst /* 70417683Spst * Check that X isn't used on exit from this 70517683Spst * block (which the optimizer might cause). 70617683Spst * We know the code generator won't generate 70717683Spst * any local dependencies. 70817683Spst */ 70917683Spst if (ATOMELEM(b->out_use, X_ATOM)) 71017683Spst break; 71117683Spst 71217683Spst if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B)) 71317683Spst add = next; 71417683Spst else 71517683Spst add = this_op(next->next); 71617683Spst if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X)) 71717683Spst break; 71817683Spst 71917683Spst tax = this_op(add->next); 72017683Spst if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX)) 72117683Spst break; 72217683Spst 72317683Spst ild = this_op(tax->next); 72417683Spst if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD || 72517683Spst BPF_MODE(ild->s.code) != BPF_IND) 72617683Spst break; 72717683Spst /* 72817683Spst * XXX We need to check that X is not 72917683Spst * subsequently used. We know we can eliminate the 73017683Spst * accumulator modifications since it is defined 73117683Spst * by the last stmt of this sequence. 73217683Spst * 73317683Spst * We want to turn this sequence: 73417683Spst * 73517683Spst * (004) ldi #0x2 {s} 73617683Spst * (005) ldxms [14] {next} -- optional 73717683Spst * (006) addx {add} 73817683Spst * (007) tax {tax} 73917683Spst * (008) ild [x+0] {ild} 74017683Spst * 74117683Spst * into this sequence: 74217683Spst * 74317683Spst * (004) nop 74417683Spst * (005) ldxms [14] 74517683Spst * (006) nop 74617683Spst * (007) nop 74717683Spst * (008) ild [x+2] 74817683Spst * 74917683Spst */ 75017683Spst ild->s.k += s->s.k; 75117683Spst s->s.code = NOP; 75217683Spst add->s.code = NOP; 75317683Spst tax->s.code = NOP; 75417683Spst done = 0; 75517683Spst } 75617683Spst s = next; 75717683Spst } 75817683Spst /* 75917683Spst * If we have a subtract to do a comparison, and the X register 76017683Spst * is a known constant, we can merge this value into the 76117683Spst * comparison. 76217683Spst */ 763127664Sbms if (BPF_OP(b->s.code) == BPF_JEQ) { 764127664Sbms if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) && 765127664Sbms !ATOMELEM(b->out_use, A_ATOM)) { 766127664Sbms val = b->val[X_ATOM]; 767127664Sbms if (vmap[val].is_const) { 768127664Sbms /* 769127664Sbms * sub x -> nop 770127664Sbms * jeq #y jeq #(x+y) 771127664Sbms */ 772127664Sbms b->s.k += vmap[val].const_val; 773127664Sbms last->s.code = NOP; 774127664Sbms done = 0; 775127664Sbms } else if (b->s.k == 0) { 776127664Sbms /* 777127664Sbms * sub #x -> nop 778127664Sbms * jeq #0 jeq #x 779127664Sbms */ 780127664Sbms last->s.code = NOP; 781127664Sbms b->s.code = BPF_CLASS(b->s.code) | 782127664Sbms BPF_OP(b->s.code) | BPF_X; 783127664Sbms done = 0; 784127664Sbms } 785127664Sbms } 786127664Sbms /* 787127664Sbms * Likewise, a constant subtract can be simplified. 788127664Sbms */ 789127664Sbms else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) && 790127664Sbms !ATOMELEM(b->out_use, A_ATOM)) { 79117683Spst 79217683Spst last->s.code = NOP; 793127664Sbms b->s.k += last->s.k; 79417683Spst done = 0; 79517683Spst } 79617683Spst } 79717683Spst /* 79817683Spst * and #k nop 79917683Spst * jeq #0 -> jset #k 80017683Spst */ 80117683Spst if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) && 80217683Spst !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) { 80317683Spst b->s.k = last->s.k; 80417683Spst b->s.code = BPF_JMP|BPF_K|BPF_JSET; 80517683Spst last->s.code = NOP; 80617683Spst done = 0; 80717683Spst opt_not(b); 80817683Spst } 80917683Spst /* 810127664Sbms * jset #0 -> never 811127664Sbms * jset #ffffffff -> always 812127664Sbms */ 813127664Sbms if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) { 814127664Sbms if (b->s.k == 0) 815127664Sbms JT(b) = JF(b); 816127664Sbms if (b->s.k == 0xffffffff) 817127664Sbms JF(b) = JT(b); 818127664Sbms } 819127664Sbms /* 82017683Spst * If the accumulator is a known constant, we can compute the 82117683Spst * comparison result. 82217683Spst */ 82317683Spst val = b->val[A_ATOM]; 82417683Spst if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) { 82517683Spst bpf_int32 v = vmap[val].const_val; 82617683Spst switch (BPF_OP(b->s.code)) { 82717683Spst 82817683Spst case BPF_JEQ: 82917683Spst v = v == b->s.k; 83017683Spst break; 83117683Spst 83217683Spst case BPF_JGT: 83317683Spst v = (unsigned)v > b->s.k; 83417683Spst break; 83517683Spst 83617683Spst case BPF_JGE: 83717683Spst v = (unsigned)v >= b->s.k; 83817683Spst break; 83917683Spst 84017683Spst case BPF_JSET: 84117683Spst v &= b->s.k; 84217683Spst break; 84317683Spst 84417683Spst default: 84517683Spst abort(); 84617683Spst } 84717683Spst if (JF(b) != JT(b)) 84817683Spst done = 0; 84917683Spst if (v) 85017683Spst JF(b) = JT(b); 85117683Spst else 85217683Spst JT(b) = JF(b); 85317683Spst } 85417683Spst} 85517683Spst 85617683Spst/* 85717683Spst * Compute the symbolic value of expression of 's', and update 85817683Spst * anything it defines in the value table 'val'. If 'alter' is true, 85917683Spst * do various optimizations. This code would be cleaner if symbolic 86017683Spst * evaluation and code transformations weren't folded together. 86117683Spst */ 86217683Spststatic void 86317683Spstopt_stmt(s, val, alter) 86417683Spst struct stmt *s; 86517683Spst int val[]; 86617683Spst int alter; 86717683Spst{ 86817683Spst int op; 86917683Spst int v; 87017683Spst 87117683Spst switch (s->code) { 87217683Spst 87317683Spst case BPF_LD|BPF_ABS|BPF_W: 87417683Spst case BPF_LD|BPF_ABS|BPF_H: 87517683Spst case BPF_LD|BPF_ABS|BPF_B: 87617683Spst v = F(s->code, s->k, 0L); 87717683Spst vstore(s, &val[A_ATOM], v, alter); 87817683Spst break; 87917683Spst 88017683Spst case BPF_LD|BPF_IND|BPF_W: 88117683Spst case BPF_LD|BPF_IND|BPF_H: 88217683Spst case BPF_LD|BPF_IND|BPF_B: 88317683Spst v = val[X_ATOM]; 88417683Spst if (alter && vmap[v].is_const) { 88517683Spst s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code); 88617683Spst s->k += vmap[v].const_val; 88717683Spst v = F(s->code, s->k, 0L); 88817683Spst done = 0; 88917683Spst } 89017683Spst else 89117683Spst v = F(s->code, s->k, v); 89217683Spst vstore(s, &val[A_ATOM], v, alter); 89317683Spst break; 89417683Spst 89517683Spst case BPF_LD|BPF_LEN: 89617683Spst v = F(s->code, 0L, 0L); 89717683Spst vstore(s, &val[A_ATOM], v, alter); 89817683Spst break; 89917683Spst 90017683Spst case BPF_LD|BPF_IMM: 90117683Spst v = K(s->k); 90217683Spst vstore(s, &val[A_ATOM], v, alter); 90317683Spst break; 90417683Spst 90517683Spst case BPF_LDX|BPF_IMM: 90617683Spst v = K(s->k); 90717683Spst vstore(s, &val[X_ATOM], v, alter); 90817683Spst break; 90917683Spst 91017683Spst case BPF_LDX|BPF_MSH|BPF_B: 91117683Spst v = F(s->code, s->k, 0L); 91217683Spst vstore(s, &val[X_ATOM], v, alter); 91317683Spst break; 91417683Spst 91517683Spst case BPF_ALU|BPF_NEG: 91617683Spst if (alter && vmap[val[A_ATOM]].is_const) { 91717683Spst s->code = BPF_LD|BPF_IMM; 91817683Spst s->k = -vmap[val[A_ATOM]].const_val; 91917683Spst val[A_ATOM] = K(s->k); 92017683Spst } 92117683Spst else 92217683Spst val[A_ATOM] = F(s->code, val[A_ATOM], 0L); 92317683Spst break; 92417683Spst 92517683Spst case BPF_ALU|BPF_ADD|BPF_K: 92617683Spst case BPF_ALU|BPF_SUB|BPF_K: 92717683Spst case BPF_ALU|BPF_MUL|BPF_K: 92817683Spst case BPF_ALU|BPF_DIV|BPF_K: 92917683Spst case BPF_ALU|BPF_AND|BPF_K: 93017683Spst case BPF_ALU|BPF_OR|BPF_K: 93117683Spst case BPF_ALU|BPF_LSH|BPF_K: 93217683Spst case BPF_ALU|BPF_RSH|BPF_K: 93317683Spst op = BPF_OP(s->code); 93417683Spst if (alter) { 93517683Spst if (s->k == 0) { 93698530Sfenner /* don't optimize away "sub #0" 93798530Sfenner * as it may be needed later to 93898530Sfenner * fixup the generated math code */ 93998530Sfenner if (op == BPF_ADD || 94017683Spst op == BPF_LSH || op == BPF_RSH || 94117683Spst op == BPF_OR) { 94217683Spst s->code = NOP; 94317683Spst break; 94417683Spst } 94517683Spst if (op == BPF_MUL || op == BPF_AND) { 94617683Spst s->code = BPF_LD|BPF_IMM; 94717683Spst val[A_ATOM] = K(s->k); 94817683Spst break; 94917683Spst } 95017683Spst } 95117683Spst if (vmap[val[A_ATOM]].is_const) { 95217683Spst fold_op(s, val[A_ATOM], K(s->k)); 95317683Spst val[A_ATOM] = K(s->k); 95417683Spst break; 95517683Spst } 95617683Spst } 95717683Spst val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k)); 95817683Spst break; 95917683Spst 96017683Spst case BPF_ALU|BPF_ADD|BPF_X: 96117683Spst case BPF_ALU|BPF_SUB|BPF_X: 96217683Spst case BPF_ALU|BPF_MUL|BPF_X: 96317683Spst case BPF_ALU|BPF_DIV|BPF_X: 96417683Spst case BPF_ALU|BPF_AND|BPF_X: 96517683Spst case BPF_ALU|BPF_OR|BPF_X: 96617683Spst case BPF_ALU|BPF_LSH|BPF_X: 96717683Spst case BPF_ALU|BPF_RSH|BPF_X: 96817683Spst op = BPF_OP(s->code); 96917683Spst if (alter && vmap[val[X_ATOM]].is_const) { 97017683Spst if (vmap[val[A_ATOM]].is_const) { 97117683Spst fold_op(s, val[A_ATOM], val[X_ATOM]); 97217683Spst val[A_ATOM] = K(s->k); 97317683Spst } 97417683Spst else { 97517683Spst s->code = BPF_ALU|BPF_K|op; 97617683Spst s->k = vmap[val[X_ATOM]].const_val; 97717683Spst done = 0; 97817683Spst val[A_ATOM] = 97917683Spst F(s->code, val[A_ATOM], K(s->k)); 98017683Spst } 98117683Spst break; 98217683Spst } 98317683Spst /* 98417683Spst * Check if we're doing something to an accumulator 98517683Spst * that is 0, and simplify. This may not seem like 98617683Spst * much of a simplification but it could open up further 98717683Spst * optimizations. 988127664Sbms * XXX We could also check for mul by 1, etc. 98917683Spst */ 99017683Spst if (alter && vmap[val[A_ATOM]].is_const 99117683Spst && vmap[val[A_ATOM]].const_val == 0) { 992127664Sbms if (op == BPF_ADD || op == BPF_OR) { 99317683Spst s->code = BPF_MISC|BPF_TXA; 99417683Spst vstore(s, &val[A_ATOM], val[X_ATOM], alter); 99517683Spst break; 99617683Spst } 99717683Spst else if (op == BPF_MUL || op == BPF_DIV || 998127664Sbms op == BPF_AND || op == BPF_LSH || op == BPF_RSH) { 99917683Spst s->code = BPF_LD|BPF_IMM; 100017683Spst s->k = 0; 100117683Spst vstore(s, &val[A_ATOM], K(s->k), alter); 100217683Spst break; 100317683Spst } 100417683Spst else if (op == BPF_NEG) { 100517683Spst s->code = NOP; 100617683Spst break; 100717683Spst } 100817683Spst } 100917683Spst val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]); 101017683Spst break; 101117683Spst 101217683Spst case BPF_MISC|BPF_TXA: 101317683Spst vstore(s, &val[A_ATOM], val[X_ATOM], alter); 101417683Spst break; 101517683Spst 101617683Spst case BPF_LD|BPF_MEM: 101717683Spst v = val[s->k]; 101817683Spst if (alter && vmap[v].is_const) { 101917683Spst s->code = BPF_LD|BPF_IMM; 102017683Spst s->k = vmap[v].const_val; 102117683Spst done = 0; 102217683Spst } 102317683Spst vstore(s, &val[A_ATOM], v, alter); 102417683Spst break; 102517683Spst 102617683Spst case BPF_MISC|BPF_TAX: 102717683Spst vstore(s, &val[X_ATOM], val[A_ATOM], alter); 102817683Spst break; 102917683Spst 103017683Spst case BPF_LDX|BPF_MEM: 103117683Spst v = val[s->k]; 103217683Spst if (alter && vmap[v].is_const) { 103317683Spst s->code = BPF_LDX|BPF_IMM; 103417683Spst s->k = vmap[v].const_val; 103517683Spst done = 0; 103617683Spst } 103717683Spst vstore(s, &val[X_ATOM], v, alter); 103817683Spst break; 103917683Spst 104017683Spst case BPF_ST: 104117683Spst vstore(s, &val[s->k], val[A_ATOM], alter); 104217683Spst break; 104317683Spst 104417683Spst case BPF_STX: 104517683Spst vstore(s, &val[s->k], val[X_ATOM], alter); 104617683Spst break; 104717683Spst } 104817683Spst} 104917683Spst 105017683Spststatic void 105117683Spstdeadstmt(s, last) 105217683Spst register struct stmt *s; 105317683Spst register struct stmt *last[]; 105417683Spst{ 105517683Spst register int atom; 105617683Spst 105717683Spst atom = atomuse(s); 105817683Spst if (atom >= 0) { 105917683Spst if (atom == AX_ATOM) { 106017683Spst last[X_ATOM] = 0; 106117683Spst last[A_ATOM] = 0; 106217683Spst } 106317683Spst else 106417683Spst last[atom] = 0; 106517683Spst } 106617683Spst atom = atomdef(s); 106717683Spst if (atom >= 0) { 106817683Spst if (last[atom]) { 106917683Spst done = 0; 107017683Spst last[atom]->code = NOP; 107117683Spst } 107217683Spst last[atom] = s; 107317683Spst } 107417683Spst} 107517683Spst 107617683Spststatic void 107717683Spstopt_deadstores(b) 107817683Spst register struct block *b; 107917683Spst{ 108017683Spst register struct slist *s; 108117683Spst register int atom; 108217683Spst struct stmt *last[N_ATOMS]; 108317683Spst 108417683Spst memset((char *)last, 0, sizeof last); 108517683Spst 108617683Spst for (s = b->stmts; s != 0; s = s->next) 108717683Spst deadstmt(&s->s, last); 108817683Spst deadstmt(&b->s, last); 108917683Spst 109017683Spst for (atom = 0; atom < N_ATOMS; ++atom) 109117683Spst if (last[atom] && !ATOMELEM(b->out_use, atom)) { 109217683Spst last[atom]->code = NOP; 109317683Spst done = 0; 109417683Spst } 109517683Spst} 109617683Spst 109717683Spststatic void 109817683Spstopt_blk(b, do_stmts) 109917683Spst struct block *b; 110017683Spst int do_stmts; 110117683Spst{ 110217683Spst struct slist *s; 110317683Spst struct edge *p; 110417683Spst int i; 110517683Spst bpf_int32 aval; 110617683Spst 110756889Sfenner#if 0 110856889Sfenner for (s = b->stmts; s && s->next; s = s->next) 110956889Sfenner if (BPF_CLASS(s->s.code) == BPF_JMP) { 111056889Sfenner do_stmts = 0; 111156889Sfenner break; 111256889Sfenner } 111356889Sfenner#endif 111456889Sfenner 111517683Spst /* 111617683Spst * Initialize the atom values. 111717683Spst * If we have no predecessors, everything is undefined. 111817683Spst * Otherwise, we inherent our values from our predecessors. 111917683Spst * If any register has an ambiguous value (i.e. control paths are 112017683Spst * merging) give it the undefined value of 0. 112117683Spst */ 112217683Spst p = b->in_edges; 112317683Spst if (p == 0) 112417683Spst memset((char *)b->val, 0, sizeof(b->val)); 112517683Spst else { 112617683Spst memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val)); 112717683Spst while ((p = p->next) != NULL) { 112817683Spst for (i = 0; i < N_ATOMS; ++i) 112917683Spst if (b->val[i] != p->pred->val[i]) 113017683Spst b->val[i] = 0; 113117683Spst } 113217683Spst } 113317683Spst aval = b->val[A_ATOM]; 113417683Spst for (s = b->stmts; s; s = s->next) 113517683Spst opt_stmt(&s->s, b->val, do_stmts); 113617683Spst 113717683Spst /* 113817683Spst * This is a special case: if we don't use anything from this 113917683Spst * block, and we load the accumulator with value that is 114017683Spst * already there, or if this block is a return, 114117683Spst * eliminate all the statements. 114217683Spst */ 1143127664Sbms if (do_stmts && 114417683Spst ((b->out_use == 0 && aval != 0 &&b->val[A_ATOM] == aval) || 114517683Spst BPF_CLASS(b->s.code) == BPF_RET)) { 114617683Spst if (b->stmts != 0) { 114717683Spst b->stmts = 0; 114817683Spst done = 0; 114917683Spst } 115017683Spst } else { 115117683Spst opt_peep(b); 115217683Spst opt_deadstores(b); 115317683Spst } 115417683Spst /* 115517683Spst * Set up values for branch optimizer. 115617683Spst */ 115717683Spst if (BPF_SRC(b->s.code) == BPF_K) 115817683Spst b->oval = K(b->s.k); 115917683Spst else 116017683Spst b->oval = b->val[X_ATOM]; 116117683Spst b->et.code = b->s.code; 116217683Spst b->ef.code = -b->s.code; 116317683Spst} 116417683Spst 116517683Spst/* 116617683Spst * Return true if any register that is used on exit from 'succ', has 116717683Spst * an exit value that is different from the corresponding exit value 116817683Spst * from 'b'. 116917683Spst */ 117017683Spststatic int 117117683Spstuse_conflict(b, succ) 117217683Spst struct block *b, *succ; 117317683Spst{ 117417683Spst int atom; 117517683Spst atomset use = succ->out_use; 117617683Spst 117717683Spst if (use == 0) 117817683Spst return 0; 117917683Spst 118017683Spst for (atom = 0; atom < N_ATOMS; ++atom) 118117683Spst if (ATOMELEM(use, atom)) 118217683Spst if (b->val[atom] != succ->val[atom]) 118317683Spst return 1; 118417683Spst return 0; 118517683Spst} 118617683Spst 118717683Spststatic struct block * 118817683Spstfold_edge(child, ep) 118917683Spst struct block *child; 119017683Spst struct edge *ep; 119117683Spst{ 119217683Spst int sense; 119317683Spst int aval0, aval1, oval0, oval1; 119417683Spst int code = ep->code; 119517683Spst 119617683Spst if (code < 0) { 119717683Spst code = -code; 119817683Spst sense = 0; 119917683Spst } else 120017683Spst sense = 1; 120117683Spst 120217683Spst if (child->s.code != code) 120317683Spst return 0; 120417683Spst 120517683Spst aval0 = child->val[A_ATOM]; 120617683Spst oval0 = child->oval; 120717683Spst aval1 = ep->pred->val[A_ATOM]; 120817683Spst oval1 = ep->pred->oval; 120917683Spst 121017683Spst if (aval0 != aval1) 121117683Spst return 0; 121217683Spst 121317683Spst if (oval0 == oval1) 121417683Spst /* 121517683Spst * The operands are identical, so the 121617683Spst * result is true if a true branch was 121717683Spst * taken to get here, otherwise false. 121817683Spst */ 121917683Spst return sense ? JT(child) : JF(child); 122017683Spst 122117683Spst if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K)) 122217683Spst /* 122317683Spst * At this point, we only know the comparison if we 122417683Spst * came down the true branch, and it was an equality 122517683Spst * comparison with a constant. We rely on the fact that 122617683Spst * distinct constants have distinct value numbers. 122717683Spst */ 122817683Spst return JF(child); 122917683Spst 123017683Spst return 0; 123117683Spst} 123217683Spst 123317683Spststatic void 123417683Spstopt_j(ep) 123517683Spst struct edge *ep; 123617683Spst{ 123717683Spst register int i, k; 123817683Spst register struct block *target; 123917683Spst 124017683Spst if (JT(ep->succ) == 0) 124117683Spst return; 124217683Spst 124317683Spst if (JT(ep->succ) == JF(ep->succ)) { 124417683Spst /* 124517683Spst * Common branch targets can be eliminated, provided 124617683Spst * there is no data dependency. 124717683Spst */ 124817683Spst if (!use_conflict(ep->pred, ep->succ->et.succ)) { 124917683Spst done = 0; 125017683Spst ep->succ = JT(ep->succ); 125117683Spst } 125217683Spst } 125317683Spst /* 125417683Spst * For each edge dominator that matches the successor of this 125517683Spst * edge, promote the edge successor to the its grandchild. 125617683Spst * 125717683Spst * XXX We violate the set abstraction here in favor a reasonably 125817683Spst * efficient loop. 125917683Spst */ 126017683Spst top: 126117683Spst for (i = 0; i < edgewords; ++i) { 126217683Spst register bpf_u_int32 x = ep->edom[i]; 126317683Spst 126417683Spst while (x != 0) { 126517683Spst k = ffs(x) - 1; 126617683Spst x &=~ (1 << k); 126717683Spst k += i * BITS_PER_WORD; 126817683Spst 126917683Spst target = fold_edge(ep->succ, edges[k]); 127017683Spst /* 127117683Spst * Check that there is no data dependency between 127217683Spst * nodes that will be violated if we move the edge. 127317683Spst */ 127417683Spst if (target != 0 && !use_conflict(ep->pred, target)) { 127517683Spst done = 0; 127617683Spst ep->succ = target; 127717683Spst if (JT(target) != 0) 127817683Spst /* 127917683Spst * Start over unless we hit a leaf. 128017683Spst */ 128117683Spst goto top; 128217683Spst return; 128317683Spst } 128417683Spst } 128517683Spst } 128617683Spst} 128717683Spst 128817683Spst 128917683Spststatic void 129017683Spstor_pullup(b) 129117683Spst struct block *b; 129217683Spst{ 129317683Spst int val, at_top; 129417683Spst struct block *pull; 129517683Spst struct block **diffp, **samep; 129617683Spst struct edge *ep; 129717683Spst 129817683Spst ep = b->in_edges; 129917683Spst if (ep == 0) 130017683Spst return; 130117683Spst 130217683Spst /* 130317683Spst * Make sure each predecessor loads the same value. 130417683Spst * XXX why? 130517683Spst */ 130617683Spst val = ep->pred->val[A_ATOM]; 130717683Spst for (ep = ep->next; ep != 0; ep = ep->next) 130817683Spst if (val != ep->pred->val[A_ATOM]) 130917683Spst return; 131017683Spst 131117683Spst if (JT(b->in_edges->pred) == b) 131217683Spst diffp = &JT(b->in_edges->pred); 131317683Spst else 131417683Spst diffp = &JF(b->in_edges->pred); 131517683Spst 131617683Spst at_top = 1; 131717683Spst while (1) { 131817683Spst if (*diffp == 0) 131917683Spst return; 132017683Spst 132117683Spst if (JT(*diffp) != JT(b)) 132217683Spst return; 132317683Spst 132417683Spst if (!SET_MEMBER((*diffp)->dom, b->id)) 132517683Spst return; 132617683Spst 132717683Spst if ((*diffp)->val[A_ATOM] != val) 132817683Spst break; 132917683Spst 133017683Spst diffp = &JF(*diffp); 133117683Spst at_top = 0; 133217683Spst } 133317683Spst samep = &JF(*diffp); 133417683Spst while (1) { 133517683Spst if (*samep == 0) 133617683Spst return; 133717683Spst 133817683Spst if (JT(*samep) != JT(b)) 133917683Spst return; 134017683Spst 134117683Spst if (!SET_MEMBER((*samep)->dom, b->id)) 134217683Spst return; 134317683Spst 134417683Spst if ((*samep)->val[A_ATOM] == val) 134517683Spst break; 134617683Spst 134717683Spst /* XXX Need to check that there are no data dependencies 134817683Spst between dp0 and dp1. Currently, the code generator 134917683Spst will not produce such dependencies. */ 135017683Spst samep = &JF(*samep); 135117683Spst } 135217683Spst#ifdef notdef 135317683Spst /* XXX This doesn't cover everything. */ 135417683Spst for (i = 0; i < N_ATOMS; ++i) 135517683Spst if ((*samep)->val[i] != pred->val[i]) 135617683Spst return; 135717683Spst#endif 135817683Spst /* Pull up the node. */ 135917683Spst pull = *samep; 136017683Spst *samep = JF(pull); 136117683Spst JF(pull) = *diffp; 136217683Spst 136317683Spst /* 136417683Spst * At the top of the chain, each predecessor needs to point at the 136517683Spst * pulled up node. Inside the chain, there is only one predecessor 136617683Spst * to worry about. 136717683Spst */ 136817683Spst if (at_top) { 136917683Spst for (ep = b->in_edges; ep != 0; ep = ep->next) { 137017683Spst if (JT(ep->pred) == b) 137117683Spst JT(ep->pred) = pull; 137217683Spst else 137317683Spst JF(ep->pred) = pull; 137417683Spst } 137517683Spst } 137617683Spst else 137717683Spst *diffp = pull; 137817683Spst 137917683Spst done = 0; 138017683Spst} 138117683Spst 138217683Spststatic void 138317683Spstand_pullup(b) 138417683Spst struct block *b; 138517683Spst{ 138617683Spst int val, at_top; 138717683Spst struct block *pull; 138817683Spst struct block **diffp, **samep; 138917683Spst struct edge *ep; 139017683Spst 139117683Spst ep = b->in_edges; 139217683Spst if (ep == 0) 139317683Spst return; 139417683Spst 139517683Spst /* 139617683Spst * Make sure each predecessor loads the same value. 139717683Spst */ 139817683Spst val = ep->pred->val[A_ATOM]; 139917683Spst for (ep = ep->next; ep != 0; ep = ep->next) 140017683Spst if (val != ep->pred->val[A_ATOM]) 140117683Spst return; 140217683Spst 140317683Spst if (JT(b->in_edges->pred) == b) 140417683Spst diffp = &JT(b->in_edges->pred); 140517683Spst else 140617683Spst diffp = &JF(b->in_edges->pred); 140717683Spst 140817683Spst at_top = 1; 140917683Spst while (1) { 141017683Spst if (*diffp == 0) 141117683Spst return; 141217683Spst 141317683Spst if (JF(*diffp) != JF(b)) 141417683Spst return; 141517683Spst 141617683Spst if (!SET_MEMBER((*diffp)->dom, b->id)) 141717683Spst return; 141817683Spst 141917683Spst if ((*diffp)->val[A_ATOM] != val) 142017683Spst break; 142117683Spst 142217683Spst diffp = &JT(*diffp); 142317683Spst at_top = 0; 142417683Spst } 142517683Spst samep = &JT(*diffp); 142617683Spst while (1) { 142717683Spst if (*samep == 0) 142817683Spst return; 142917683Spst 143017683Spst if (JF(*samep) != JF(b)) 143117683Spst return; 143217683Spst 143317683Spst if (!SET_MEMBER((*samep)->dom, b->id)) 143417683Spst return; 143517683Spst 143617683Spst if ((*samep)->val[A_ATOM] == val) 143717683Spst break; 143817683Spst 143917683Spst /* XXX Need to check that there are no data dependencies 144017683Spst between diffp and samep. Currently, the code generator 144117683Spst will not produce such dependencies. */ 144217683Spst samep = &JT(*samep); 144317683Spst } 144417683Spst#ifdef notdef 144517683Spst /* XXX This doesn't cover everything. */ 144617683Spst for (i = 0; i < N_ATOMS; ++i) 144717683Spst if ((*samep)->val[i] != pred->val[i]) 144817683Spst return; 144917683Spst#endif 145017683Spst /* Pull up the node. */ 145117683Spst pull = *samep; 145217683Spst *samep = JT(pull); 145317683Spst JT(pull) = *diffp; 145417683Spst 145517683Spst /* 145617683Spst * At the top of the chain, each predecessor needs to point at the 145717683Spst * pulled up node. Inside the chain, there is only one predecessor 145817683Spst * to worry about. 145917683Spst */ 146017683Spst if (at_top) { 146117683Spst for (ep = b->in_edges; ep != 0; ep = ep->next) { 146217683Spst if (JT(ep->pred) == b) 146317683Spst JT(ep->pred) = pull; 146417683Spst else 146517683Spst JF(ep->pred) = pull; 146617683Spst } 146717683Spst } 146817683Spst else 146917683Spst *diffp = pull; 147017683Spst 147117683Spst done = 0; 147217683Spst} 147317683Spst 147417683Spststatic void 147517683Spstopt_blks(root, do_stmts) 147617683Spst struct block *root; 147717683Spst int do_stmts; 147817683Spst{ 147917683Spst int i, maxlevel; 148017683Spst struct block *p; 148117683Spst 148217683Spst init_val(); 148317683Spst maxlevel = root->level; 148475107Sfenner 148575107Sfenner find_inedges(root); 148617683Spst for (i = maxlevel; i >= 0; --i) 148717683Spst for (p = levels[i]; p; p = p->link) 148817683Spst opt_blk(p, do_stmts); 148917683Spst 149017683Spst if (do_stmts) 149117683Spst /* 149217683Spst * No point trying to move branches; it can't possibly 149317683Spst * make a difference at this point. 149417683Spst */ 149517683Spst return; 149617683Spst 149717683Spst for (i = 1; i <= maxlevel; ++i) { 149817683Spst for (p = levels[i]; p; p = p->link) { 149917683Spst opt_j(&p->et); 150017683Spst opt_j(&p->ef); 150117683Spst } 150217683Spst } 150375107Sfenner 150475107Sfenner find_inedges(root); 150517683Spst for (i = 1; i <= maxlevel; ++i) { 150617683Spst for (p = levels[i]; p; p = p->link) { 150717683Spst or_pullup(p); 150817683Spst and_pullup(p); 150917683Spst } 151017683Spst } 151117683Spst} 151217683Spst 151317683Spststatic inline void 151417683Spstlink_inedge(parent, child) 151517683Spst struct edge *parent; 151617683Spst struct block *child; 151717683Spst{ 151817683Spst parent->next = child->in_edges; 151917683Spst child->in_edges = parent; 152017683Spst} 152117683Spst 152217683Spststatic void 152317683Spstfind_inedges(root) 152417683Spst struct block *root; 152517683Spst{ 152617683Spst int i; 152717683Spst struct block *b; 152817683Spst 152917683Spst for (i = 0; i < n_blocks; ++i) 153017683Spst blocks[i]->in_edges = 0; 153117683Spst 153217683Spst /* 153317683Spst * Traverse the graph, adding each edge to the predecessor 153417683Spst * list of its successors. Skip the leaves (i.e. level 0). 153517683Spst */ 153617683Spst for (i = root->level; i > 0; --i) { 153717683Spst for (b = levels[i]; b != 0; b = b->link) { 153817683Spst link_inedge(&b->et, JT(b)); 153917683Spst link_inedge(&b->ef, JF(b)); 154017683Spst } 154117683Spst } 154217683Spst} 154317683Spst 154417683Spststatic void 154517683Spstopt_root(b) 154617683Spst struct block **b; 154717683Spst{ 154817683Spst struct slist *tmp, *s; 154917683Spst 155017683Spst s = (*b)->stmts; 155117683Spst (*b)->stmts = 0; 155217683Spst while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b)) 155317683Spst *b = JT(*b); 155417683Spst 155517683Spst tmp = (*b)->stmts; 155617683Spst if (tmp != 0) 155717683Spst sappend(s, tmp); 155817683Spst (*b)->stmts = s; 155917683Spst 156017683Spst /* 156117683Spst * If the root node is a return, then there is no 156217683Spst * point executing any statements (since the bpf machine 156317683Spst * has no side effects). 156417683Spst */ 156517683Spst if (BPF_CLASS((*b)->s.code) == BPF_RET) 156617683Spst (*b)->stmts = 0; 156717683Spst} 156817683Spst 156917683Spststatic void 157017683Spstopt_loop(root, do_stmts) 157117683Spst struct block *root; 157217683Spst int do_stmts; 157317683Spst{ 157417683Spst 157517683Spst#ifdef BDEBUG 157698530Sfenner if (dflag > 1) { 157798530Sfenner printf("opt_loop(root, %d) begin\n", do_stmts); 157817683Spst opt_dump(root); 157998530Sfenner } 158017683Spst#endif 158117683Spst do { 158217683Spst done = 1; 158317683Spst find_levels(root); 158417683Spst find_dom(root); 158517683Spst find_closure(root); 158617683Spst find_ud(root); 158717683Spst find_edom(root); 158817683Spst opt_blks(root, do_stmts); 158917683Spst#ifdef BDEBUG 159098530Sfenner if (dflag > 1) { 159198530Sfenner printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done); 159217683Spst opt_dump(root); 159398530Sfenner } 159417683Spst#endif 159517683Spst } while (!done); 159617683Spst} 159717683Spst 159817683Spst/* 159917683Spst * Optimize the filter code in its dag representation. 160017683Spst */ 160117683Spstvoid 160217683Spstbpf_optimize(rootp) 160317683Spst struct block **rootp; 160417683Spst{ 160517683Spst struct block *root; 160617683Spst 160717683Spst root = *rootp; 160817683Spst 160917683Spst opt_init(root); 161017683Spst opt_loop(root, 0); 161117683Spst opt_loop(root, 1); 161217683Spst intern_blocks(root); 161398530Sfenner#ifdef BDEBUG 161498530Sfenner if (dflag > 1) { 161598530Sfenner printf("after intern_blocks()\n"); 161698530Sfenner opt_dump(root); 161798530Sfenner } 161898530Sfenner#endif 161917683Spst opt_root(rootp); 162098530Sfenner#ifdef BDEBUG 162198530Sfenner if (dflag > 1) { 162298530Sfenner printf("after opt_root()\n"); 162398530Sfenner opt_dump(root); 162498530Sfenner } 162598530Sfenner#endif 162617683Spst opt_cleanup(); 162717683Spst} 162817683Spst 162917683Spststatic void 163017683Spstmake_marks(p) 163117683Spst struct block *p; 163217683Spst{ 163317683Spst if (!isMarked(p)) { 163417683Spst Mark(p); 163517683Spst if (BPF_CLASS(p->s.code) != BPF_RET) { 163617683Spst make_marks(JT(p)); 163717683Spst make_marks(JF(p)); 163817683Spst } 163917683Spst } 164017683Spst} 164117683Spst 164217683Spst/* 164317683Spst * Mark code array such that isMarked(i) is true 164417683Spst * only for nodes that are alive. 164517683Spst */ 164617683Spststatic void 164717683Spstmark_code(p) 164817683Spst struct block *p; 164917683Spst{ 165017683Spst cur_mark += 1; 165117683Spst make_marks(p); 165217683Spst} 165317683Spst 165417683Spst/* 165517683Spst * True iff the two stmt lists load the same value from the packet into 165617683Spst * the accumulator. 165717683Spst */ 165817683Spststatic int 165917683Spsteq_slist(x, y) 166017683Spst struct slist *x, *y; 166117683Spst{ 166217683Spst while (1) { 166317683Spst while (x && x->s.code == NOP) 166417683Spst x = x->next; 166517683Spst while (y && y->s.code == NOP) 166617683Spst y = y->next; 166717683Spst if (x == 0) 166817683Spst return y == 0; 166917683Spst if (y == 0) 167017683Spst return x == 0; 167117683Spst if (x->s.code != y->s.code || x->s.k != y->s.k) 167217683Spst return 0; 167317683Spst x = x->next; 167417683Spst y = y->next; 167517683Spst } 167617683Spst} 167717683Spst 167817683Spststatic inline int 167917683Spsteq_blk(b0, b1) 168017683Spst struct block *b0, *b1; 168117683Spst{ 168217683Spst if (b0->s.code == b1->s.code && 168317683Spst b0->s.k == b1->s.k && 168417683Spst b0->et.succ == b1->et.succ && 168517683Spst b0->ef.succ == b1->ef.succ) 168617683Spst return eq_slist(b0->stmts, b1->stmts); 168717683Spst return 0; 168817683Spst} 168917683Spst 169017683Spststatic void 169117683Spstintern_blocks(root) 169217683Spst struct block *root; 169317683Spst{ 169417683Spst struct block *p; 169517683Spst int i, j; 169617683Spst int done; 169717683Spst top: 169817683Spst done = 1; 169917683Spst for (i = 0; i < n_blocks; ++i) 170017683Spst blocks[i]->link = 0; 170117683Spst 170217683Spst mark_code(root); 170317683Spst 170417683Spst for (i = n_blocks - 1; --i >= 0; ) { 170517683Spst if (!isMarked(blocks[i])) 170617683Spst continue; 170717683Spst for (j = i + 1; j < n_blocks; ++j) { 170817683Spst if (!isMarked(blocks[j])) 170917683Spst continue; 171017683Spst if (eq_blk(blocks[i], blocks[j])) { 171117683Spst blocks[i]->link = blocks[j]->link ? 171217683Spst blocks[j]->link : blocks[j]; 171317683Spst break; 171417683Spst } 171517683Spst } 171617683Spst } 171717683Spst for (i = 0; i < n_blocks; ++i) { 171817683Spst p = blocks[i]; 171917683Spst if (JT(p) == 0) 172017683Spst continue; 172117683Spst if (JT(p)->link) { 172217683Spst done = 0; 172317683Spst JT(p) = JT(p)->link; 172417683Spst } 172517683Spst if (JF(p)->link) { 172617683Spst done = 0; 172717683Spst JF(p) = JF(p)->link; 172817683Spst } 172917683Spst } 173017683Spst if (!done) 173117683Spst goto top; 173217683Spst} 173317683Spst 173417683Spststatic void 173517683Spstopt_cleanup() 173617683Spst{ 173717683Spst free((void *)vnode_base); 173817683Spst free((void *)vmap); 173917683Spst free((void *)edges); 174017683Spst free((void *)space); 174117683Spst free((void *)levels); 174217683Spst free((void *)blocks); 174317683Spst} 174417683Spst 174517683Spst/* 174617683Spst * Return the number of stmts in 's'. 174717683Spst */ 174817683Spststatic int 174917683Spstslength(s) 175017683Spst struct slist *s; 175117683Spst{ 175217683Spst int n = 0; 175317683Spst 175417683Spst for (; s; s = s->next) 175517683Spst if (s->s.code != NOP) 175617683Spst ++n; 175717683Spst return n; 175817683Spst} 175917683Spst 176017683Spst/* 176117683Spst * Return the number of nodes reachable by 'p'. 176217683Spst * All nodes should be initially unmarked. 176317683Spst */ 176417683Spststatic int 176517683Spstcount_blocks(p) 176617683Spst struct block *p; 176717683Spst{ 176817683Spst if (p == 0 || isMarked(p)) 176917683Spst return 0; 177017683Spst Mark(p); 177117683Spst return count_blocks(JT(p)) + count_blocks(JF(p)) + 1; 177217683Spst} 177317683Spst 177417683Spst/* 177517683Spst * Do a depth first search on the flow graph, numbering the 177617683Spst * the basic blocks, and entering them into the 'blocks' array.` 177717683Spst */ 177817683Spststatic void 177917683Spstnumber_blks_r(p) 178017683Spst struct block *p; 178117683Spst{ 178217683Spst int n; 178317683Spst 178417683Spst if (p == 0 || isMarked(p)) 178517683Spst return; 178617683Spst 178717683Spst Mark(p); 178817683Spst n = n_blocks++; 178917683Spst p->id = n; 179017683Spst blocks[n] = p; 179117683Spst 179217683Spst number_blks_r(JT(p)); 179317683Spst number_blks_r(JF(p)); 179417683Spst} 179517683Spst 179617683Spst/* 179717683Spst * Return the number of stmts in the flowgraph reachable by 'p'. 179817683Spst * The nodes should be unmarked before calling. 179975107Sfenner * 180075107Sfenner * Note that "stmts" means "instructions", and that this includes 180175107Sfenner * 180275107Sfenner * side-effect statements in 'p' (slength(p->stmts)); 180375107Sfenner * 180475107Sfenner * statements in the true branch from 'p' (count_stmts(JT(p))); 180575107Sfenner * 180675107Sfenner * statements in the false branch from 'p' (count_stmts(JF(p))); 180775107Sfenner * 180875107Sfenner * the conditional jump itself (1); 180975107Sfenner * 181075107Sfenner * an extra long jump if the true branch requires it (p->longjt); 181175107Sfenner * 181275107Sfenner * an extra long jump if the false branch requires it (p->longjf). 181317683Spst */ 181417683Spststatic int 181517683Spstcount_stmts(p) 181617683Spst struct block *p; 181717683Spst{ 181817683Spst int n; 181917683Spst 182017683Spst if (p == 0 || isMarked(p)) 182117683Spst return 0; 182217683Spst Mark(p); 182317683Spst n = count_stmts(JT(p)) + count_stmts(JF(p)); 182475107Sfenner return slength(p->stmts) + n + 1 + p->longjt + p->longjf; 182517683Spst} 182617683Spst 182717683Spst/* 182817683Spst * Allocate memory. All allocation is done before optimization 182917683Spst * is begun. A linear bound on the size of all data structures is computed 183017683Spst * from the total number of blocks and/or statements. 183117683Spst */ 183217683Spststatic void 183317683Spstopt_init(root) 183417683Spst struct block *root; 183517683Spst{ 183617683Spst bpf_u_int32 *p; 183717683Spst int i, n, max_stmts; 183817683Spst 183917683Spst /* 184017683Spst * First, count the blocks, so we can malloc an array to map 184117683Spst * block number to block. Then, put the blocks into the array. 184217683Spst */ 184317683Spst unMarkAll(); 184417683Spst n = count_blocks(root); 184517683Spst blocks = (struct block **)malloc(n * sizeof(*blocks)); 1846127664Sbms if (blocks == NULL) 1847127664Sbms bpf_error("malloc"); 184817683Spst unMarkAll(); 184917683Spst n_blocks = 0; 185017683Spst number_blks_r(root); 185117683Spst 185217683Spst n_edges = 2 * n_blocks; 185317683Spst edges = (struct edge **)malloc(n_edges * sizeof(*edges)); 1854127664Sbms if (edges == NULL) 1855127664Sbms bpf_error("malloc"); 185617683Spst 185717683Spst /* 185817683Spst * The number of levels is bounded by the number of nodes. 185917683Spst */ 186017683Spst levels = (struct block **)malloc(n_blocks * sizeof(*levels)); 1861127664Sbms if (levels == NULL) 1862127664Sbms bpf_error("malloc"); 186317683Spst 186417683Spst edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1; 186517683Spst nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1; 186617683Spst 186717683Spst /* XXX */ 186817683Spst space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space) 186917683Spst + n_edges * edgewords * sizeof(*space)); 1870127664Sbms if (space == NULL) 1871127664Sbms bpf_error("malloc"); 187217683Spst p = space; 187317683Spst all_dom_sets = p; 187417683Spst for (i = 0; i < n; ++i) { 187517683Spst blocks[i]->dom = p; 187617683Spst p += nodewords; 187717683Spst } 187817683Spst all_closure_sets = p; 187917683Spst for (i = 0; i < n; ++i) { 188017683Spst blocks[i]->closure = p; 188117683Spst p += nodewords; 188217683Spst } 188317683Spst all_edge_sets = p; 188417683Spst for (i = 0; i < n; ++i) { 188517683Spst register struct block *b = blocks[i]; 188617683Spst 188717683Spst b->et.edom = p; 188817683Spst p += edgewords; 188917683Spst b->ef.edom = p; 189017683Spst p += edgewords; 189117683Spst b->et.id = i; 189217683Spst edges[i] = &b->et; 189317683Spst b->ef.id = n_blocks + i; 189417683Spst edges[n_blocks + i] = &b->ef; 189517683Spst b->et.pred = b; 189617683Spst b->ef.pred = b; 189717683Spst } 189817683Spst max_stmts = 0; 189917683Spst for (i = 0; i < n; ++i) 190017683Spst max_stmts += slength(blocks[i]->stmts) + 1; 190117683Spst /* 190217683Spst * We allocate at most 3 value numbers per statement, 190317683Spst * so this is an upper bound on the number of valnodes 190417683Spst * we'll need. 190517683Spst */ 190617683Spst maxval = 3 * max_stmts; 190717683Spst vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap)); 190875107Sfenner vnode_base = (struct valnode *)malloc(maxval * sizeof(*vnode_base)); 1909127664Sbms if (vmap == NULL || vnode_base == NULL) 1910127664Sbms bpf_error("malloc"); 191117683Spst} 191217683Spst 191317683Spst/* 191417683Spst * Some pointers used to convert the basic block form of the code, 191517683Spst * into the array form that BPF requires. 'fstart' will point to 191617683Spst * the malloc'd array while 'ftail' is used during the recursive traversal. 191717683Spst */ 191817683Spststatic struct bpf_insn *fstart; 191917683Spststatic struct bpf_insn *ftail; 192017683Spst 192117683Spst#ifdef BDEBUG 192217683Spstint bids[1000]; 192317683Spst#endif 192417683Spst 192517683Spst/* 192617683Spst * Returns true if successful. Returns false if a branch has 192717683Spst * an offset that is too large. If so, we have marked that 192817683Spst * branch so that on a subsequent iteration, it will be treated 192917683Spst * properly. 193017683Spst */ 193117683Spststatic int 193217683Spstconvert_code_r(p) 193317683Spst struct block *p; 193417683Spst{ 193517683Spst struct bpf_insn *dst; 193617683Spst struct slist *src; 193717683Spst int slen; 193817683Spst u_int off; 193917683Spst int extrajmps; /* number of extra jumps inserted */ 194056889Sfenner struct slist **offset = NULL; 194117683Spst 194217683Spst if (p == 0 || isMarked(p)) 194317683Spst return (1); 194417683Spst Mark(p); 194517683Spst 194617683Spst if (convert_code_r(JF(p)) == 0) 194717683Spst return (0); 194817683Spst if (convert_code_r(JT(p)) == 0) 194917683Spst return (0); 195017683Spst 195117683Spst slen = slength(p->stmts); 195217683Spst dst = ftail -= (slen + 1 + p->longjt + p->longjf); 195317683Spst /* inflate length by any extra jumps */ 195417683Spst 195517683Spst p->offset = dst - fstart; 195617683Spst 195756889Sfenner /* generate offset[] for convenience */ 195856889Sfenner if (slen) { 1959127664Sbms offset = (struct slist **)calloc(slen, sizeof(struct slist *)); 196056889Sfenner if (!offset) { 196156889Sfenner bpf_error("not enough core"); 196256889Sfenner /*NOTREACHED*/ 196356889Sfenner } 196456889Sfenner } 196556889Sfenner src = p->stmts; 196656889Sfenner for (off = 0; off < slen && src; off++) { 196756889Sfenner#if 0 196856889Sfenner printf("off=%d src=%x\n", off, src); 196956889Sfenner#endif 197056889Sfenner offset[off] = src; 197156889Sfenner src = src->next; 197256889Sfenner } 197356889Sfenner 197456889Sfenner off = 0; 197517683Spst for (src = p->stmts; src; src = src->next) { 197617683Spst if (src->s.code == NOP) 197717683Spst continue; 197817683Spst dst->code = (u_short)src->s.code; 197917683Spst dst->k = src->s.k; 198056889Sfenner 198156889Sfenner /* fill block-local relative jump */ 198275107Sfenner if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) { 198356889Sfenner#if 0 198456889Sfenner if (src->s.jt || src->s.jf) { 198556889Sfenner bpf_error("illegal jmp destination"); 198656889Sfenner /*NOTREACHED*/ 198756889Sfenner } 198856889Sfenner#endif 198956889Sfenner goto filled; 199056889Sfenner } 199156889Sfenner if (off == slen - 2) /*???*/ 199256889Sfenner goto filled; 199356889Sfenner 199456889Sfenner { 199556889Sfenner int i; 199656889Sfenner int jt, jf; 199756889Sfenner char *ljerr = "%s for block-local relative jump: off=%d"; 199856889Sfenner 199956889Sfenner#if 0 200056889Sfenner printf("code=%x off=%d %x %x\n", src->s.code, 200156889Sfenner off, src->s.jt, src->s.jf); 200256889Sfenner#endif 200356889Sfenner 200456889Sfenner if (!src->s.jt || !src->s.jf) { 200556889Sfenner bpf_error(ljerr, "no jmp destination", off); 200656889Sfenner /*NOTREACHED*/ 200756889Sfenner } 200856889Sfenner 200956889Sfenner jt = jf = 0; 201056889Sfenner for (i = 0; i < slen; i++) { 201156889Sfenner if (offset[i] == src->s.jt) { 201256889Sfenner if (jt) { 201356889Sfenner bpf_error(ljerr, "multiple matches", off); 201456889Sfenner /*NOTREACHED*/ 201556889Sfenner } 201656889Sfenner 201756889Sfenner dst->jt = i - off - 1; 201856889Sfenner jt++; 201956889Sfenner } 202056889Sfenner if (offset[i] == src->s.jf) { 202156889Sfenner if (jf) { 202256889Sfenner bpf_error(ljerr, "multiple matches", off); 202356889Sfenner /*NOTREACHED*/ 202456889Sfenner } 202556889Sfenner dst->jf = i - off - 1; 202656889Sfenner jf++; 202756889Sfenner } 202856889Sfenner } 202956889Sfenner if (!jt || !jf) { 203056889Sfenner bpf_error(ljerr, "no destination found", off); 203156889Sfenner /*NOTREACHED*/ 203256889Sfenner } 203356889Sfenner } 203456889Sfennerfilled: 203517683Spst ++dst; 203656889Sfenner ++off; 203717683Spst } 203856889Sfenner if (offset) 203956889Sfenner free(offset); 204056889Sfenner 204117683Spst#ifdef BDEBUG 204217683Spst bids[dst - fstart] = p->id + 1; 204317683Spst#endif 204417683Spst dst->code = (u_short)p->s.code; 204517683Spst dst->k = p->s.k; 204617683Spst if (JT(p)) { 204717683Spst extrajmps = 0; 204817683Spst off = JT(p)->offset - (p->offset + slen) - 1; 204917683Spst if (off >= 256) { 205017683Spst /* offset too large for branch, must add a jump */ 205117683Spst if (p->longjt == 0) { 205217683Spst /* mark this instruction and retry */ 205317683Spst p->longjt++; 205417683Spst return(0); 205517683Spst } 205617683Spst /* branch if T to following jump */ 205717683Spst dst->jt = extrajmps; 205817683Spst extrajmps++; 205917683Spst dst[extrajmps].code = BPF_JMP|BPF_JA; 206017683Spst dst[extrajmps].k = off - extrajmps; 206117683Spst } 206217683Spst else 206317683Spst dst->jt = off; 206417683Spst off = JF(p)->offset - (p->offset + slen) - 1; 206517683Spst if (off >= 256) { 206617683Spst /* offset too large for branch, must add a jump */ 206717683Spst if (p->longjf == 0) { 206817683Spst /* mark this instruction and retry */ 206917683Spst p->longjf++; 207017683Spst return(0); 207117683Spst } 207217683Spst /* branch if F to following jump */ 207317683Spst /* if two jumps are inserted, F goes to second one */ 207417683Spst dst->jf = extrajmps; 207517683Spst extrajmps++; 207617683Spst dst[extrajmps].code = BPF_JMP|BPF_JA; 207717683Spst dst[extrajmps].k = off - extrajmps; 207817683Spst } 207917683Spst else 208017683Spst dst->jf = off; 208117683Spst } 208217683Spst return (1); 208317683Spst} 208417683Spst 208517683Spst 208617683Spst/* 208717683Spst * Convert flowgraph intermediate representation to the 208817683Spst * BPF array representation. Set *lenp to the number of instructions. 208917683Spst */ 209017683Spststruct bpf_insn * 209117683Spsticode_to_fcode(root, lenp) 209217683Spst struct block *root; 209317683Spst int *lenp; 209417683Spst{ 209517683Spst int n; 209617683Spst struct bpf_insn *fp; 209717683Spst 209817683Spst /* 209998530Sfenner * Loop doing convert_code_r() until no branches remain 210017683Spst * with too-large offsets. 210117683Spst */ 210217683Spst while (1) { 210317683Spst unMarkAll(); 210417683Spst n = *lenp = count_stmts(root); 2105127664Sbms 210617683Spst fp = (struct bpf_insn *)malloc(sizeof(*fp) * n); 2107127664Sbms if (fp == NULL) 2108127664Sbms bpf_error("malloc"); 210917683Spst memset((char *)fp, 0, sizeof(*fp) * n); 211017683Spst fstart = fp; 211117683Spst ftail = fp + n; 2112127664Sbms 211317683Spst unMarkAll(); 211417683Spst if (convert_code_r(root)) 211517683Spst break; 211617683Spst free(fp); 211717683Spst } 211817683Spst 211917683Spst return fp; 212017683Spst} 212117683Spst 212275107Sfenner/* 212375107Sfenner * Make a copy of a BPF program and put it in the "fcode" member of 212475107Sfenner * a "pcap_t". 212575107Sfenner * 212675107Sfenner * If we fail to allocate memory for the copy, fill in the "errbuf" 212775107Sfenner * member of the "pcap_t" with an error message, and return -1; 212875107Sfenner * otherwise, return 0. 212975107Sfenner */ 213075107Sfennerint 213175107Sfennerinstall_bpf_program(pcap_t *p, struct bpf_program *fp) 213275107Sfenner{ 213375107Sfenner size_t prog_size; 213475107Sfenner 213575107Sfenner /* 213675107Sfenner * Free up any already installed program. 213775107Sfenner */ 213875107Sfenner pcap_freecode(&p->fcode); 213975107Sfenner 214075107Sfenner prog_size = sizeof(*fp->bf_insns) * fp->bf_len; 214175107Sfenner p->fcode.bf_len = fp->bf_len; 214275107Sfenner p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size); 214375107Sfenner if (p->fcode.bf_insns == NULL) { 214475107Sfenner snprintf(p->errbuf, sizeof(p->errbuf), 214575107Sfenner "malloc: %s", pcap_strerror(errno)); 214675107Sfenner return (-1); 214775107Sfenner } 214875107Sfenner memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size); 214975107Sfenner return (0); 215075107Sfenner} 215175107Sfenner 215217683Spst#ifdef BDEBUG 215317683Spststatic void 215417683Spstopt_dump(root) 215517683Spst struct block *root; 215617683Spst{ 215717683Spst struct bpf_program f; 215817683Spst 215917683Spst memset(bids, 0, sizeof bids); 216017683Spst f.bf_insns = icode_to_fcode(root, &f.bf_len); 216117683Spst bpf_dump(&f, 1); 216217683Spst putchar('\n'); 216317683Spst free((char *)f.bf_insns); 216417683Spst} 216517683Spst#endif 2166