optimize.c revision 17683
117683Spst/* 217683Spst * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996 317683Spst * The Regents of the University of California. All rights reserved. 417683Spst * 517683Spst * Redistribution and use in source and binary forms, with or without 617683Spst * modification, are permitted provided that: (1) source code distributions 717683Spst * retain the above copyright notice and this paragraph in its entirety, (2) 817683Spst * distributions including binary code include the above copyright notice and 917683Spst * this paragraph in its entirety in the documentation or other materials 1017683Spst * provided with the distribution, and (3) all advertising materials mentioning 1117683Spst * features or use of this software display the following acknowledgement: 1217683Spst * ``This product includes software developed by the University of California, 1317683Spst * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of 1417683Spst * the University nor the names of its contributors may be used to endorse 1517683Spst * or promote products derived from this software without specific prior 1617683Spst * written permission. 1717683Spst * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 1817683Spst * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 1917683Spst * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 2017683Spst * 2117683Spst * Optimization module for tcpdump intermediate representation. 2217683Spst */ 2317683Spst#ifndef lint 2417683Spststatic char rcsid[] = 2517683Spst "@(#) $Header: optimize.c,v 1.59 96/07/15 00:48:49 leres Exp $ (LBL)"; 2617683Spst#endif 2717683Spst 2817683Spst#include <sys/types.h> 2917683Spst#include <sys/time.h> 3017683Spst 3117683Spst#include <stdio.h> 3217683Spst#include <stdlib.h> 3317683Spst#include <memory.h> 3417683Spst 3517683Spst#include "pcap-int.h" 3617683Spst 3717683Spst#include "gencode.h" 3817683Spst 3917683Spst#include "gnuc.h" 4017683Spst#ifdef HAVE_OS_PROTO_H 4117683Spst#include "os-proto.h" 4217683Spst#endif 4317683Spst 4417683Spst#ifdef BDEBUG 4517683Spstextern int dflag; 4617683Spst#endif 4717683Spst 4817683Spst#define A_ATOM BPF_MEMWORDS 4917683Spst#define X_ATOM (BPF_MEMWORDS+1) 5017683Spst 5117683Spst#define NOP -1 5217683Spst 5317683Spst/* 5417683Spst * This define is used to represent *both* the accumulator and 5517683Spst * x register in use-def computations. 5617683Spst * Currently, the use-def code assumes only one definition per instruction. 5717683Spst */ 5817683Spst#define AX_ATOM N_ATOMS 5917683Spst 6017683Spst/* 6117683Spst * A flag to indicate that further optimization is needed. 6217683Spst * Iterative passes are continued until a given pass yields no 6317683Spst * branch movement. 6417683Spst */ 6517683Spststatic int done; 6617683Spst 6717683Spst/* 6817683Spst * A block is marked if only if its mark equals the current mark. 6917683Spst * Rather than traverse the code array, marking each item, 'cur_mark' is 7017683Spst * incremented. This automatically makes each element unmarked. 7117683Spst */ 7217683Spststatic int cur_mark; 7317683Spst#define isMarked(p) ((p)->mark == cur_mark) 7417683Spst#define unMarkAll() cur_mark += 1 7517683Spst#define Mark(p) ((p)->mark = cur_mark) 7617683Spst 7717683Spststatic void opt_init(struct block *); 7817683Spststatic void opt_cleanup(void); 7917683Spst 8017683Spststatic void make_marks(struct block *); 8117683Spststatic void mark_code(struct block *); 8217683Spst 8317683Spststatic void intern_blocks(struct block *); 8417683Spst 8517683Spststatic int eq_slist(struct slist *, struct slist *); 8617683Spst 8717683Spststatic void find_levels_r(struct block *); 8817683Spst 8917683Spststatic void find_levels(struct block *); 9017683Spststatic void find_dom(struct block *); 9117683Spststatic void propedom(struct edge *); 9217683Spststatic void find_edom(struct block *); 9317683Spststatic void find_closure(struct block *); 9417683Spststatic int atomuse(struct stmt *); 9517683Spststatic int atomdef(struct stmt *); 9617683Spststatic void compute_local_ud(struct block *); 9717683Spststatic void find_ud(struct block *); 9817683Spststatic void init_val(void); 9917683Spststatic int F(int, int, int); 10017683Spststatic inline void vstore(struct stmt *, int *, int, int); 10117683Spststatic void opt_blk(struct block *, int); 10217683Spststatic int use_conflict(struct block *, struct block *); 10317683Spststatic void opt_j(struct edge *); 10417683Spststatic void or_pullup(struct block *); 10517683Spststatic void and_pullup(struct block *); 10617683Spststatic void opt_blks(struct block *, int); 10717683Spststatic inline void link_inedge(struct edge *, struct block *); 10817683Spststatic void find_inedges(struct block *); 10917683Spststatic void opt_root(struct block **); 11017683Spststatic void opt_loop(struct block *, int); 11117683Spststatic void fold_op(struct stmt *, int, int); 11217683Spststatic inline struct slist *this_op(struct slist *); 11317683Spststatic void opt_not(struct block *); 11417683Spststatic void opt_peep(struct block *); 11517683Spststatic void opt_stmt(struct stmt *, int[], int); 11617683Spststatic void deadstmt(struct stmt *, struct stmt *[]); 11717683Spststatic void opt_deadstores(struct block *); 11817683Spststatic void opt_blk(struct block *, int); 11917683Spststatic int use_conflict(struct block *, struct block *); 12017683Spststatic void opt_j(struct edge *); 12117683Spststatic struct block *fold_edge(struct block *, struct edge *); 12217683Spststatic inline int eq_blk(struct block *, struct block *); 12317683Spststatic int slength(struct slist *); 12417683Spststatic int count_blocks(struct block *); 12517683Spststatic void number_blks_r(struct block *); 12617683Spststatic int count_stmts(struct block *); 12717683Spststatic int convert_code_r(struct block *); 12817683Spst#ifdef BDEBUG 12917683Spststatic void opt_dump(struct block *); 13017683Spst#endif 13117683Spst 13217683Spststatic int n_blocks; 13317683Spststruct block **blocks; 13417683Spststatic int n_edges; 13517683Spststruct edge **edges; 13617683Spst 13717683Spst/* 13817683Spst * A bit vector set representation of the dominators. 13917683Spst * We round up the set size to the next power of two. 14017683Spst */ 14117683Spststatic int nodewords; 14217683Spststatic int edgewords; 14317683Spststruct block **levels; 14417683Spstbpf_u_int32 *space; 14517683Spst#define BITS_PER_WORD (8*sizeof(bpf_u_int32)) 14617683Spst/* 14717683Spst * True if a is in uset {p} 14817683Spst */ 14917683Spst#define SET_MEMBER(p, a) \ 15017683Spst((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD))) 15117683Spst 15217683Spst/* 15317683Spst * Add 'a' to uset p. 15417683Spst */ 15517683Spst#define SET_INSERT(p, a) \ 15617683Spst(p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD)) 15717683Spst 15817683Spst/* 15917683Spst * Delete 'a' from uset p. 16017683Spst */ 16117683Spst#define SET_DELETE(p, a) \ 16217683Spst(p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD)) 16317683Spst 16417683Spst/* 16517683Spst * a := a intersect b 16617683Spst */ 16717683Spst#define SET_INTERSECT(a, b, n)\ 16817683Spst{\ 16917683Spst register bpf_u_int32 *_x = a, *_y = b;\ 17017683Spst register int _n = n;\ 17117683Spst while (--_n >= 0) *_x++ &= *_y++;\ 17217683Spst} 17317683Spst 17417683Spst/* 17517683Spst * a := a - b 17617683Spst */ 17717683Spst#define SET_SUBTRACT(a, b, n)\ 17817683Spst{\ 17917683Spst register bpf_u_int32 *_x = a, *_y = b;\ 18017683Spst register int _n = n;\ 18117683Spst while (--_n >= 0) *_x++ &=~ *_y++;\ 18217683Spst} 18317683Spst 18417683Spst/* 18517683Spst * a := a union b 18617683Spst */ 18717683Spst#define SET_UNION(a, b, n)\ 18817683Spst{\ 18917683Spst register bpf_u_int32 *_x = a, *_y = b;\ 19017683Spst register int _n = n;\ 19117683Spst while (--_n >= 0) *_x++ |= *_y++;\ 19217683Spst} 19317683Spst 19417683Spststatic uset all_dom_sets; 19517683Spststatic uset all_closure_sets; 19617683Spststatic uset all_edge_sets; 19717683Spst 19817683Spst#ifndef MAX 19917683Spst#define MAX(a,b) ((a)>(b)?(a):(b)) 20017683Spst#endif 20117683Spst 20217683Spststatic void 20317683Spstfind_levels_r(b) 20417683Spst struct block *b; 20517683Spst{ 20617683Spst int level; 20717683Spst 20817683Spst if (isMarked(b)) 20917683Spst return; 21017683Spst 21117683Spst Mark(b); 21217683Spst b->link = 0; 21317683Spst 21417683Spst if (JT(b)) { 21517683Spst find_levels_r(JT(b)); 21617683Spst find_levels_r(JF(b)); 21717683Spst level = MAX(JT(b)->level, JF(b)->level) + 1; 21817683Spst } else 21917683Spst level = 0; 22017683Spst b->level = level; 22117683Spst b->link = levels[level]; 22217683Spst levels[level] = b; 22317683Spst} 22417683Spst 22517683Spst/* 22617683Spst * Level graph. The levels go from 0 at the leaves to 22717683Spst * N_LEVELS at the root. The levels[] array points to the 22817683Spst * first node of the level list, whose elements are linked 22917683Spst * with the 'link' field of the struct block. 23017683Spst */ 23117683Spststatic void 23217683Spstfind_levels(root) 23317683Spst struct block *root; 23417683Spst{ 23517683Spst memset((char *)levels, 0, n_blocks * sizeof(*levels)); 23617683Spst unMarkAll(); 23717683Spst find_levels_r(root); 23817683Spst} 23917683Spst 24017683Spst/* 24117683Spst * Find dominator relationships. 24217683Spst * Assumes graph has been leveled. 24317683Spst */ 24417683Spststatic void 24517683Spstfind_dom(root) 24617683Spst struct block *root; 24717683Spst{ 24817683Spst int i; 24917683Spst struct block *b; 25017683Spst bpf_u_int32 *x; 25117683Spst 25217683Spst /* 25317683Spst * Initialize sets to contain all nodes. 25417683Spst */ 25517683Spst x = all_dom_sets; 25617683Spst i = n_blocks * nodewords; 25717683Spst while (--i >= 0) 25817683Spst *x++ = ~0; 25917683Spst /* Root starts off empty. */ 26017683Spst for (i = nodewords; --i >= 0;) 26117683Spst root->dom[i] = 0; 26217683Spst 26317683Spst /* root->level is the highest level no found. */ 26417683Spst for (i = root->level; i >= 0; --i) { 26517683Spst for (b = levels[i]; b; b = b->link) { 26617683Spst SET_INSERT(b->dom, b->id); 26717683Spst if (JT(b) == 0) 26817683Spst continue; 26917683Spst SET_INTERSECT(JT(b)->dom, b->dom, nodewords); 27017683Spst SET_INTERSECT(JF(b)->dom, b->dom, nodewords); 27117683Spst } 27217683Spst } 27317683Spst} 27417683Spst 27517683Spststatic void 27617683Spstpropedom(ep) 27717683Spst struct edge *ep; 27817683Spst{ 27917683Spst SET_INSERT(ep->edom, ep->id); 28017683Spst if (ep->succ) { 28117683Spst SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords); 28217683Spst SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords); 28317683Spst } 28417683Spst} 28517683Spst 28617683Spst/* 28717683Spst * Compute edge dominators. 28817683Spst * Assumes graph has been leveled and predecessors established. 28917683Spst */ 29017683Spststatic void 29117683Spstfind_edom(root) 29217683Spst struct block *root; 29317683Spst{ 29417683Spst int i; 29517683Spst uset x; 29617683Spst struct block *b; 29717683Spst 29817683Spst x = all_edge_sets; 29917683Spst for (i = n_edges * edgewords; --i >= 0; ) 30017683Spst x[i] = ~0; 30117683Spst 30217683Spst /* root->level is the highest level no found. */ 30317683Spst memset(root->et.edom, 0, edgewords * sizeof(*(uset)0)); 30417683Spst memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0)); 30517683Spst for (i = root->level; i >= 0; --i) { 30617683Spst for (b = levels[i]; b != 0; b = b->link) { 30717683Spst propedom(&b->et); 30817683Spst propedom(&b->ef); 30917683Spst } 31017683Spst } 31117683Spst} 31217683Spst 31317683Spst/* 31417683Spst * Find the backwards transitive closure of the flow graph. These sets 31517683Spst * are backwards in the sense that we find the set of nodes that reach 31617683Spst * a given node, not the set of nodes that can be reached by a node. 31717683Spst * 31817683Spst * Assumes graph has been leveled. 31917683Spst */ 32017683Spststatic void 32117683Spstfind_closure(root) 32217683Spst struct block *root; 32317683Spst{ 32417683Spst int i; 32517683Spst struct block *b; 32617683Spst 32717683Spst /* 32817683Spst * Initialize sets to contain no nodes. 32917683Spst */ 33017683Spst memset((char *)all_closure_sets, 0, 33117683Spst n_blocks * nodewords * sizeof(*all_closure_sets)); 33217683Spst 33317683Spst /* root->level is the highest level no found. */ 33417683Spst for (i = root->level; i >= 0; --i) { 33517683Spst for (b = levels[i]; b; b = b->link) { 33617683Spst SET_INSERT(b->closure, b->id); 33717683Spst if (JT(b) == 0) 33817683Spst continue; 33917683Spst SET_UNION(JT(b)->closure, b->closure, nodewords); 34017683Spst SET_UNION(JF(b)->closure, b->closure, nodewords); 34117683Spst } 34217683Spst } 34317683Spst} 34417683Spst 34517683Spst/* 34617683Spst * Return the register number that is used by s. If A and X are both 34717683Spst * used, return AX_ATOM. If no register is used, return -1. 34817683Spst * 34917683Spst * The implementation should probably change to an array access. 35017683Spst */ 35117683Spststatic int 35217683Spstatomuse(s) 35317683Spst struct stmt *s; 35417683Spst{ 35517683Spst register int c = s->code; 35617683Spst 35717683Spst if (c == NOP) 35817683Spst return -1; 35917683Spst 36017683Spst switch (BPF_CLASS(c)) { 36117683Spst 36217683Spst case BPF_RET: 36317683Spst return (BPF_RVAL(c) == BPF_A) ? A_ATOM : 36417683Spst (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1; 36517683Spst 36617683Spst case BPF_LD: 36717683Spst case BPF_LDX: 36817683Spst return (BPF_MODE(c) == BPF_IND) ? X_ATOM : 36917683Spst (BPF_MODE(c) == BPF_MEM) ? s->k : -1; 37017683Spst 37117683Spst case BPF_ST: 37217683Spst return A_ATOM; 37317683Spst 37417683Spst case BPF_STX: 37517683Spst return X_ATOM; 37617683Spst 37717683Spst case BPF_JMP: 37817683Spst case BPF_ALU: 37917683Spst if (BPF_SRC(c) == BPF_X) 38017683Spst return AX_ATOM; 38117683Spst return A_ATOM; 38217683Spst 38317683Spst case BPF_MISC: 38417683Spst return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM; 38517683Spst } 38617683Spst abort(); 38717683Spst /* NOTREACHED */ 38817683Spst} 38917683Spst 39017683Spst/* 39117683Spst * Return the register number that is defined by 's'. We assume that 39217683Spst * a single stmt cannot define more than one register. If no register 39317683Spst * is defined, return -1. 39417683Spst * 39517683Spst * The implementation should probably change to an array access. 39617683Spst */ 39717683Spststatic int 39817683Spstatomdef(s) 39917683Spst struct stmt *s; 40017683Spst{ 40117683Spst if (s->code == NOP) 40217683Spst return -1; 40317683Spst 40417683Spst switch (BPF_CLASS(s->code)) { 40517683Spst 40617683Spst case BPF_LD: 40717683Spst case BPF_ALU: 40817683Spst return A_ATOM; 40917683Spst 41017683Spst case BPF_LDX: 41117683Spst return X_ATOM; 41217683Spst 41317683Spst case BPF_ST: 41417683Spst case BPF_STX: 41517683Spst return s->k; 41617683Spst 41717683Spst case BPF_MISC: 41817683Spst return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM; 41917683Spst } 42017683Spst return -1; 42117683Spst} 42217683Spst 42317683Spststatic void 42417683Spstcompute_local_ud(b) 42517683Spst struct block *b; 42617683Spst{ 42717683Spst struct slist *s; 42817683Spst atomset def = 0, use = 0, kill = 0; 42917683Spst int atom; 43017683Spst 43117683Spst for (s = b->stmts; s; s = s->next) { 43217683Spst if (s->s.code == NOP) 43317683Spst continue; 43417683Spst atom = atomuse(&s->s); 43517683Spst if (atom >= 0) { 43617683Spst if (atom == AX_ATOM) { 43717683Spst if (!ATOMELEM(def, X_ATOM)) 43817683Spst use |= ATOMMASK(X_ATOM); 43917683Spst if (!ATOMELEM(def, A_ATOM)) 44017683Spst use |= ATOMMASK(A_ATOM); 44117683Spst } 44217683Spst else if (atom < N_ATOMS) { 44317683Spst if (!ATOMELEM(def, atom)) 44417683Spst use |= ATOMMASK(atom); 44517683Spst } 44617683Spst else 44717683Spst abort(); 44817683Spst } 44917683Spst atom = atomdef(&s->s); 45017683Spst if (atom >= 0) { 45117683Spst if (!ATOMELEM(use, atom)) 45217683Spst kill |= ATOMMASK(atom); 45317683Spst def |= ATOMMASK(atom); 45417683Spst } 45517683Spst } 45617683Spst if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP) 45717683Spst use |= ATOMMASK(A_ATOM); 45817683Spst 45917683Spst b->def = def; 46017683Spst b->kill = kill; 46117683Spst b->in_use = use; 46217683Spst} 46317683Spst 46417683Spst/* 46517683Spst * Assume graph is already leveled. 46617683Spst */ 46717683Spststatic void 46817683Spstfind_ud(root) 46917683Spst struct block *root; 47017683Spst{ 47117683Spst int i, maxlevel; 47217683Spst struct block *p; 47317683Spst 47417683Spst /* 47517683Spst * root->level is the highest level no found; 47617683Spst * count down from there. 47717683Spst */ 47817683Spst maxlevel = root->level; 47917683Spst for (i = maxlevel; i >= 0; --i) 48017683Spst for (p = levels[i]; p; p = p->link) { 48117683Spst compute_local_ud(p); 48217683Spst p->out_use = 0; 48317683Spst } 48417683Spst 48517683Spst for (i = 1; i <= maxlevel; ++i) { 48617683Spst for (p = levels[i]; p; p = p->link) { 48717683Spst p->out_use |= JT(p)->in_use | JF(p)->in_use; 48817683Spst p->in_use |= p->out_use &~ p->kill; 48917683Spst } 49017683Spst } 49117683Spst} 49217683Spst 49317683Spst/* 49417683Spst * These data structures are used in a Cocke and Shwarz style 49517683Spst * value numbering scheme. Since the flowgraph is acyclic, 49617683Spst * exit values can be propagated from a node's predecessors 49717683Spst * provided it is uniquely defined. 49817683Spst */ 49917683Spststruct valnode { 50017683Spst int code; 50117683Spst int v0, v1; 50217683Spst int val; 50317683Spst struct valnode *next; 50417683Spst}; 50517683Spst 50617683Spst#define MODULUS 213 50717683Spststatic struct valnode *hashtbl[MODULUS]; 50817683Spststatic int curval; 50917683Spststatic int maxval; 51017683Spst 51117683Spst/* Integer constants mapped with the load immediate opcode. */ 51217683Spst#define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L) 51317683Spst 51417683Spststruct vmapinfo { 51517683Spst int is_const; 51617683Spst bpf_int32 const_val; 51717683Spst}; 51817683Spst 51917683Spststruct vmapinfo *vmap; 52017683Spststruct valnode *vnode_base; 52117683Spststruct valnode *next_vnode; 52217683Spst 52317683Spststatic void 52417683Spstinit_val() 52517683Spst{ 52617683Spst curval = 0; 52717683Spst next_vnode = vnode_base; 52817683Spst memset((char *)vmap, 0, maxval * sizeof(*vmap)); 52917683Spst memset((char *)hashtbl, 0, sizeof hashtbl); 53017683Spst} 53117683Spst 53217683Spst/* Because we really don't have an IR, this stuff is a little messy. */ 53317683Spststatic int 53417683SpstF(code, v0, v1) 53517683Spst int code; 53617683Spst int v0, v1; 53717683Spst{ 53817683Spst u_int hash; 53917683Spst int val; 54017683Spst struct valnode *p; 54117683Spst 54217683Spst hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8); 54317683Spst hash %= MODULUS; 54417683Spst 54517683Spst for (p = hashtbl[hash]; p; p = p->next) 54617683Spst if (p->code == code && p->v0 == v0 && p->v1 == v1) 54717683Spst return p->val; 54817683Spst 54917683Spst val = ++curval; 55017683Spst if (BPF_MODE(code) == BPF_IMM && 55117683Spst (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) { 55217683Spst vmap[val].const_val = v0; 55317683Spst vmap[val].is_const = 1; 55417683Spst } 55517683Spst p = next_vnode++; 55617683Spst p->val = val; 55717683Spst p->code = code; 55817683Spst p->v0 = v0; 55917683Spst p->v1 = v1; 56017683Spst p->next = hashtbl[hash]; 56117683Spst hashtbl[hash] = p; 56217683Spst 56317683Spst return val; 56417683Spst} 56517683Spst 56617683Spststatic inline void 56717683Spstvstore(s, valp, newval, alter) 56817683Spst struct stmt *s; 56917683Spst int *valp; 57017683Spst int newval; 57117683Spst int alter; 57217683Spst{ 57317683Spst if (alter && *valp == newval) 57417683Spst s->code = NOP; 57517683Spst else 57617683Spst *valp = newval; 57717683Spst} 57817683Spst 57917683Spststatic void 58017683Spstfold_op(s, v0, v1) 58117683Spst struct stmt *s; 58217683Spst int v0, v1; 58317683Spst{ 58417683Spst bpf_int32 a, b; 58517683Spst 58617683Spst a = vmap[v0].const_val; 58717683Spst b = vmap[v1].const_val; 58817683Spst 58917683Spst switch (BPF_OP(s->code)) { 59017683Spst case BPF_ADD: 59117683Spst a += b; 59217683Spst break; 59317683Spst 59417683Spst case BPF_SUB: 59517683Spst a -= b; 59617683Spst break; 59717683Spst 59817683Spst case BPF_MUL: 59917683Spst a *= b; 60017683Spst break; 60117683Spst 60217683Spst case BPF_DIV: 60317683Spst if (b == 0) 60417683Spst bpf_error("division by zero"); 60517683Spst a /= b; 60617683Spst break; 60717683Spst 60817683Spst case BPF_AND: 60917683Spst a &= b; 61017683Spst break; 61117683Spst 61217683Spst case BPF_OR: 61317683Spst a |= b; 61417683Spst break; 61517683Spst 61617683Spst case BPF_LSH: 61717683Spst a <<= b; 61817683Spst break; 61917683Spst 62017683Spst case BPF_RSH: 62117683Spst a >>= b; 62217683Spst break; 62317683Spst 62417683Spst case BPF_NEG: 62517683Spst a = -a; 62617683Spst break; 62717683Spst 62817683Spst default: 62917683Spst abort(); 63017683Spst } 63117683Spst s->k = a; 63217683Spst s->code = BPF_LD|BPF_IMM; 63317683Spst done = 0; 63417683Spst} 63517683Spst 63617683Spststatic inline struct slist * 63717683Spstthis_op(s) 63817683Spst struct slist *s; 63917683Spst{ 64017683Spst while (s != 0 && s->s.code == NOP) 64117683Spst s = s->next; 64217683Spst return s; 64317683Spst} 64417683Spst 64517683Spststatic void 64617683Spstopt_not(b) 64717683Spst struct block *b; 64817683Spst{ 64917683Spst struct block *tmp = JT(b); 65017683Spst 65117683Spst JT(b) = JF(b); 65217683Spst JF(b) = tmp; 65317683Spst} 65417683Spst 65517683Spststatic void 65617683Spstopt_peep(b) 65717683Spst struct block *b; 65817683Spst{ 65917683Spst struct slist *s; 66017683Spst struct slist *next, *last; 66117683Spst int val; 66217683Spst 66317683Spst s = b->stmts; 66417683Spst if (s == 0) 66517683Spst return; 66617683Spst 66717683Spst last = s; 66817683Spst while (1) { 66917683Spst s = this_op(s); 67017683Spst if (s == 0) 67117683Spst break; 67217683Spst next = this_op(s->next); 67317683Spst if (next == 0) 67417683Spst break; 67517683Spst last = next; 67617683Spst 67717683Spst /* 67817683Spst * st M[k] --> st M[k] 67917683Spst * ldx M[k] tax 68017683Spst */ 68117683Spst if (s->s.code == BPF_ST && 68217683Spst next->s.code == (BPF_LDX|BPF_MEM) && 68317683Spst s->s.k == next->s.k) { 68417683Spst done = 0; 68517683Spst next->s.code = BPF_MISC|BPF_TAX; 68617683Spst } 68717683Spst /* 68817683Spst * ld #k --> ldx #k 68917683Spst * tax txa 69017683Spst */ 69117683Spst if (s->s.code == (BPF_LD|BPF_IMM) && 69217683Spst next->s.code == (BPF_MISC|BPF_TAX)) { 69317683Spst s->s.code = BPF_LDX|BPF_IMM; 69417683Spst next->s.code = BPF_MISC|BPF_TXA; 69517683Spst done = 0; 69617683Spst } 69717683Spst /* 69817683Spst * This is an ugly special case, but it happens 69917683Spst * when you say tcp[k] or udp[k] where k is a constant. 70017683Spst */ 70117683Spst if (s->s.code == (BPF_LD|BPF_IMM)) { 70217683Spst struct slist *add, *tax, *ild; 70317683Spst 70417683Spst /* 70517683Spst * Check that X isn't used on exit from this 70617683Spst * block (which the optimizer might cause). 70717683Spst * We know the code generator won't generate 70817683Spst * any local dependencies. 70917683Spst */ 71017683Spst if (ATOMELEM(b->out_use, X_ATOM)) 71117683Spst break; 71217683Spst 71317683Spst if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B)) 71417683Spst add = next; 71517683Spst else 71617683Spst add = this_op(next->next); 71717683Spst if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X)) 71817683Spst break; 71917683Spst 72017683Spst tax = this_op(add->next); 72117683Spst if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX)) 72217683Spst break; 72317683Spst 72417683Spst ild = this_op(tax->next); 72517683Spst if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD || 72617683Spst BPF_MODE(ild->s.code) != BPF_IND) 72717683Spst break; 72817683Spst /* 72917683Spst * XXX We need to check that X is not 73017683Spst * subsequently used. We know we can eliminate the 73117683Spst * accumulator modifications since it is defined 73217683Spst * by the last stmt of this sequence. 73317683Spst * 73417683Spst * We want to turn this sequence: 73517683Spst * 73617683Spst * (004) ldi #0x2 {s} 73717683Spst * (005) ldxms [14] {next} -- optional 73817683Spst * (006) addx {add} 73917683Spst * (007) tax {tax} 74017683Spst * (008) ild [x+0] {ild} 74117683Spst * 74217683Spst * into this sequence: 74317683Spst * 74417683Spst * (004) nop 74517683Spst * (005) ldxms [14] 74617683Spst * (006) nop 74717683Spst * (007) nop 74817683Spst * (008) ild [x+2] 74917683Spst * 75017683Spst */ 75117683Spst ild->s.k += s->s.k; 75217683Spst s->s.code = NOP; 75317683Spst add->s.code = NOP; 75417683Spst tax->s.code = NOP; 75517683Spst done = 0; 75617683Spst } 75717683Spst s = next; 75817683Spst } 75917683Spst /* 76017683Spst * If we have a subtract to do a comparison, and the X register 76117683Spst * is a known constant, we can merge this value into the 76217683Spst * comparison. 76317683Spst */ 76417683Spst if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) && 76517683Spst !ATOMELEM(b->out_use, A_ATOM)) { 76617683Spst val = b->val[X_ATOM]; 76717683Spst if (vmap[val].is_const) { 76817683Spst int op; 76917683Spst 77017683Spst b->s.k += vmap[val].const_val; 77117683Spst op = BPF_OP(b->s.code); 77217683Spst if (op == BPF_JGT || op == BPF_JGE) { 77317683Spst struct block *t = JT(b); 77417683Spst JT(b) = JF(b); 77517683Spst JF(b) = t; 77617683Spst b->s.k += 0x80000000; 77717683Spst } 77817683Spst last->s.code = NOP; 77917683Spst done = 0; 78017683Spst } else if (b->s.k == 0) { 78117683Spst /* 78217683Spst * sub x -> nop 78317683Spst * j #0 j x 78417683Spst */ 78517683Spst last->s.code = NOP; 78617683Spst b->s.code = BPF_CLASS(b->s.code) | BPF_OP(b->s.code) | 78717683Spst BPF_X; 78817683Spst done = 0; 78917683Spst } 79017683Spst } 79117683Spst /* 79217683Spst * Likewise, a constant subtract can be simplified. 79317683Spst */ 79417683Spst else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) && 79517683Spst !ATOMELEM(b->out_use, A_ATOM)) { 79617683Spst int op; 79717683Spst 79817683Spst b->s.k += last->s.k; 79917683Spst last->s.code = NOP; 80017683Spst op = BPF_OP(b->s.code); 80117683Spst if (op == BPF_JGT || op == BPF_JGE) { 80217683Spst struct block *t = JT(b); 80317683Spst JT(b) = JF(b); 80417683Spst JF(b) = t; 80517683Spst b->s.k += 0x80000000; 80617683Spst } 80717683Spst done = 0; 80817683Spst } 80917683Spst /* 81017683Spst * and #k nop 81117683Spst * jeq #0 -> jset #k 81217683Spst */ 81317683Spst if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) && 81417683Spst !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) { 81517683Spst b->s.k = last->s.k; 81617683Spst b->s.code = BPF_JMP|BPF_K|BPF_JSET; 81717683Spst last->s.code = NOP; 81817683Spst done = 0; 81917683Spst opt_not(b); 82017683Spst } 82117683Spst /* 82217683Spst * If the accumulator is a known constant, we can compute the 82317683Spst * comparison result. 82417683Spst */ 82517683Spst val = b->val[A_ATOM]; 82617683Spst if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) { 82717683Spst bpf_int32 v = vmap[val].const_val; 82817683Spst switch (BPF_OP(b->s.code)) { 82917683Spst 83017683Spst case BPF_JEQ: 83117683Spst v = v == b->s.k; 83217683Spst break; 83317683Spst 83417683Spst case BPF_JGT: 83517683Spst v = (unsigned)v > b->s.k; 83617683Spst break; 83717683Spst 83817683Spst case BPF_JGE: 83917683Spst v = (unsigned)v >= b->s.k; 84017683Spst break; 84117683Spst 84217683Spst case BPF_JSET: 84317683Spst v &= b->s.k; 84417683Spst break; 84517683Spst 84617683Spst default: 84717683Spst abort(); 84817683Spst } 84917683Spst if (JF(b) != JT(b)) 85017683Spst done = 0; 85117683Spst if (v) 85217683Spst JF(b) = JT(b); 85317683Spst else 85417683Spst JT(b) = JF(b); 85517683Spst } 85617683Spst} 85717683Spst 85817683Spst/* 85917683Spst * Compute the symbolic value of expression of 's', and update 86017683Spst * anything it defines in the value table 'val'. If 'alter' is true, 86117683Spst * do various optimizations. This code would be cleaner if symbolic 86217683Spst * evaluation and code transformations weren't folded together. 86317683Spst */ 86417683Spststatic void 86517683Spstopt_stmt(s, val, alter) 86617683Spst struct stmt *s; 86717683Spst int val[]; 86817683Spst int alter; 86917683Spst{ 87017683Spst int op; 87117683Spst int v; 87217683Spst 87317683Spst switch (s->code) { 87417683Spst 87517683Spst case BPF_LD|BPF_ABS|BPF_W: 87617683Spst case BPF_LD|BPF_ABS|BPF_H: 87717683Spst case BPF_LD|BPF_ABS|BPF_B: 87817683Spst v = F(s->code, s->k, 0L); 87917683Spst vstore(s, &val[A_ATOM], v, alter); 88017683Spst break; 88117683Spst 88217683Spst case BPF_LD|BPF_IND|BPF_W: 88317683Spst case BPF_LD|BPF_IND|BPF_H: 88417683Spst case BPF_LD|BPF_IND|BPF_B: 88517683Spst v = val[X_ATOM]; 88617683Spst if (alter && vmap[v].is_const) { 88717683Spst s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code); 88817683Spst s->k += vmap[v].const_val; 88917683Spst v = F(s->code, s->k, 0L); 89017683Spst done = 0; 89117683Spst } 89217683Spst else 89317683Spst v = F(s->code, s->k, v); 89417683Spst vstore(s, &val[A_ATOM], v, alter); 89517683Spst break; 89617683Spst 89717683Spst case BPF_LD|BPF_LEN: 89817683Spst v = F(s->code, 0L, 0L); 89917683Spst vstore(s, &val[A_ATOM], v, alter); 90017683Spst break; 90117683Spst 90217683Spst case BPF_LD|BPF_IMM: 90317683Spst v = K(s->k); 90417683Spst vstore(s, &val[A_ATOM], v, alter); 90517683Spst break; 90617683Spst 90717683Spst case BPF_LDX|BPF_IMM: 90817683Spst v = K(s->k); 90917683Spst vstore(s, &val[X_ATOM], v, alter); 91017683Spst break; 91117683Spst 91217683Spst case BPF_LDX|BPF_MSH|BPF_B: 91317683Spst v = F(s->code, s->k, 0L); 91417683Spst vstore(s, &val[X_ATOM], v, alter); 91517683Spst break; 91617683Spst 91717683Spst case BPF_ALU|BPF_NEG: 91817683Spst if (alter && vmap[val[A_ATOM]].is_const) { 91917683Spst s->code = BPF_LD|BPF_IMM; 92017683Spst s->k = -vmap[val[A_ATOM]].const_val; 92117683Spst val[A_ATOM] = K(s->k); 92217683Spst } 92317683Spst else 92417683Spst val[A_ATOM] = F(s->code, val[A_ATOM], 0L); 92517683Spst break; 92617683Spst 92717683Spst case BPF_ALU|BPF_ADD|BPF_K: 92817683Spst case BPF_ALU|BPF_SUB|BPF_K: 92917683Spst case BPF_ALU|BPF_MUL|BPF_K: 93017683Spst case BPF_ALU|BPF_DIV|BPF_K: 93117683Spst case BPF_ALU|BPF_AND|BPF_K: 93217683Spst case BPF_ALU|BPF_OR|BPF_K: 93317683Spst case BPF_ALU|BPF_LSH|BPF_K: 93417683Spst case BPF_ALU|BPF_RSH|BPF_K: 93517683Spst op = BPF_OP(s->code); 93617683Spst if (alter) { 93717683Spst if (s->k == 0) { 93817683Spst if (op == BPF_ADD || op == BPF_SUB || 93917683Spst op == BPF_LSH || op == BPF_RSH || 94017683Spst op == BPF_OR) { 94117683Spst s->code = NOP; 94217683Spst break; 94317683Spst } 94417683Spst if (op == BPF_MUL || op == BPF_AND) { 94517683Spst s->code = BPF_LD|BPF_IMM; 94617683Spst val[A_ATOM] = K(s->k); 94717683Spst break; 94817683Spst } 94917683Spst } 95017683Spst if (vmap[val[A_ATOM]].is_const) { 95117683Spst fold_op(s, val[A_ATOM], K(s->k)); 95217683Spst val[A_ATOM] = K(s->k); 95317683Spst break; 95417683Spst } 95517683Spst } 95617683Spst val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k)); 95717683Spst break; 95817683Spst 95917683Spst case BPF_ALU|BPF_ADD|BPF_X: 96017683Spst case BPF_ALU|BPF_SUB|BPF_X: 96117683Spst case BPF_ALU|BPF_MUL|BPF_X: 96217683Spst case BPF_ALU|BPF_DIV|BPF_X: 96317683Spst case BPF_ALU|BPF_AND|BPF_X: 96417683Spst case BPF_ALU|BPF_OR|BPF_X: 96517683Spst case BPF_ALU|BPF_LSH|BPF_X: 96617683Spst case BPF_ALU|BPF_RSH|BPF_X: 96717683Spst op = BPF_OP(s->code); 96817683Spst if (alter && vmap[val[X_ATOM]].is_const) { 96917683Spst if (vmap[val[A_ATOM]].is_const) { 97017683Spst fold_op(s, val[A_ATOM], val[X_ATOM]); 97117683Spst val[A_ATOM] = K(s->k); 97217683Spst } 97317683Spst else { 97417683Spst s->code = BPF_ALU|BPF_K|op; 97517683Spst s->k = vmap[val[X_ATOM]].const_val; 97617683Spst done = 0; 97717683Spst val[A_ATOM] = 97817683Spst F(s->code, val[A_ATOM], K(s->k)); 97917683Spst } 98017683Spst break; 98117683Spst } 98217683Spst /* 98317683Spst * Check if we're doing something to an accumulator 98417683Spst * that is 0, and simplify. This may not seem like 98517683Spst * much of a simplification but it could open up further 98617683Spst * optimizations. 98717683Spst * XXX We could also check for mul by 1, and -1, etc. 98817683Spst */ 98917683Spst if (alter && vmap[val[A_ATOM]].is_const 99017683Spst && vmap[val[A_ATOM]].const_val == 0) { 99117683Spst if (op == BPF_ADD || op == BPF_OR || 99217683Spst op == BPF_LSH || op == BPF_RSH || op == BPF_SUB) { 99317683Spst s->code = BPF_MISC|BPF_TXA; 99417683Spst vstore(s, &val[A_ATOM], val[X_ATOM], alter); 99517683Spst break; 99617683Spst } 99717683Spst else if (op == BPF_MUL || op == BPF_DIV || 99817683Spst op == BPF_AND) { 99917683Spst s->code = BPF_LD|BPF_IMM; 100017683Spst s->k = 0; 100117683Spst vstore(s, &val[A_ATOM], K(s->k), alter); 100217683Spst break; 100317683Spst } 100417683Spst else if (op == BPF_NEG) { 100517683Spst s->code = NOP; 100617683Spst break; 100717683Spst } 100817683Spst } 100917683Spst val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]); 101017683Spst break; 101117683Spst 101217683Spst case BPF_MISC|BPF_TXA: 101317683Spst vstore(s, &val[A_ATOM], val[X_ATOM], alter); 101417683Spst break; 101517683Spst 101617683Spst case BPF_LD|BPF_MEM: 101717683Spst v = val[s->k]; 101817683Spst if (alter && vmap[v].is_const) { 101917683Spst s->code = BPF_LD|BPF_IMM; 102017683Spst s->k = vmap[v].const_val; 102117683Spst done = 0; 102217683Spst } 102317683Spst vstore(s, &val[A_ATOM], v, alter); 102417683Spst break; 102517683Spst 102617683Spst case BPF_MISC|BPF_TAX: 102717683Spst vstore(s, &val[X_ATOM], val[A_ATOM], alter); 102817683Spst break; 102917683Spst 103017683Spst case BPF_LDX|BPF_MEM: 103117683Spst v = val[s->k]; 103217683Spst if (alter && vmap[v].is_const) { 103317683Spst s->code = BPF_LDX|BPF_IMM; 103417683Spst s->k = vmap[v].const_val; 103517683Spst done = 0; 103617683Spst } 103717683Spst vstore(s, &val[X_ATOM], v, alter); 103817683Spst break; 103917683Spst 104017683Spst case BPF_ST: 104117683Spst vstore(s, &val[s->k], val[A_ATOM], alter); 104217683Spst break; 104317683Spst 104417683Spst case BPF_STX: 104517683Spst vstore(s, &val[s->k], val[X_ATOM], alter); 104617683Spst break; 104717683Spst } 104817683Spst} 104917683Spst 105017683Spststatic void 105117683Spstdeadstmt(s, last) 105217683Spst register struct stmt *s; 105317683Spst register struct stmt *last[]; 105417683Spst{ 105517683Spst register int atom; 105617683Spst 105717683Spst atom = atomuse(s); 105817683Spst if (atom >= 0) { 105917683Spst if (atom == AX_ATOM) { 106017683Spst last[X_ATOM] = 0; 106117683Spst last[A_ATOM] = 0; 106217683Spst } 106317683Spst else 106417683Spst last[atom] = 0; 106517683Spst } 106617683Spst atom = atomdef(s); 106717683Spst if (atom >= 0) { 106817683Spst if (last[atom]) { 106917683Spst done = 0; 107017683Spst last[atom]->code = NOP; 107117683Spst } 107217683Spst last[atom] = s; 107317683Spst } 107417683Spst} 107517683Spst 107617683Spststatic void 107717683Spstopt_deadstores(b) 107817683Spst register struct block *b; 107917683Spst{ 108017683Spst register struct slist *s; 108117683Spst register int atom; 108217683Spst struct stmt *last[N_ATOMS]; 108317683Spst 108417683Spst memset((char *)last, 0, sizeof last); 108517683Spst 108617683Spst for (s = b->stmts; s != 0; s = s->next) 108717683Spst deadstmt(&s->s, last); 108817683Spst deadstmt(&b->s, last); 108917683Spst 109017683Spst for (atom = 0; atom < N_ATOMS; ++atom) 109117683Spst if (last[atom] && !ATOMELEM(b->out_use, atom)) { 109217683Spst last[atom]->code = NOP; 109317683Spst done = 0; 109417683Spst } 109517683Spst} 109617683Spst 109717683Spststatic void 109817683Spstopt_blk(b, do_stmts) 109917683Spst struct block *b; 110017683Spst int do_stmts; 110117683Spst{ 110217683Spst struct slist *s; 110317683Spst struct edge *p; 110417683Spst int i; 110517683Spst bpf_int32 aval; 110617683Spst 110717683Spst /* 110817683Spst * Initialize the atom values. 110917683Spst * If we have no predecessors, everything is undefined. 111017683Spst * Otherwise, we inherent our values from our predecessors. 111117683Spst * If any register has an ambiguous value (i.e. control paths are 111217683Spst * merging) give it the undefined value of 0. 111317683Spst */ 111417683Spst p = b->in_edges; 111517683Spst if (p == 0) 111617683Spst memset((char *)b->val, 0, sizeof(b->val)); 111717683Spst else { 111817683Spst memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val)); 111917683Spst while ((p = p->next) != NULL) { 112017683Spst for (i = 0; i < N_ATOMS; ++i) 112117683Spst if (b->val[i] != p->pred->val[i]) 112217683Spst b->val[i] = 0; 112317683Spst } 112417683Spst } 112517683Spst aval = b->val[A_ATOM]; 112617683Spst for (s = b->stmts; s; s = s->next) 112717683Spst opt_stmt(&s->s, b->val, do_stmts); 112817683Spst 112917683Spst /* 113017683Spst * This is a special case: if we don't use anything from this 113117683Spst * block, and we load the accumulator with value that is 113217683Spst * already there, or if this block is a return, 113317683Spst * eliminate all the statements. 113417683Spst */ 113517683Spst if (do_stmts && 113617683Spst ((b->out_use == 0 && aval != 0 &&b->val[A_ATOM] == aval) || 113717683Spst BPF_CLASS(b->s.code) == BPF_RET)) { 113817683Spst if (b->stmts != 0) { 113917683Spst b->stmts = 0; 114017683Spst done = 0; 114117683Spst } 114217683Spst } else { 114317683Spst opt_peep(b); 114417683Spst opt_deadstores(b); 114517683Spst } 114617683Spst /* 114717683Spst * Set up values for branch optimizer. 114817683Spst */ 114917683Spst if (BPF_SRC(b->s.code) == BPF_K) 115017683Spst b->oval = K(b->s.k); 115117683Spst else 115217683Spst b->oval = b->val[X_ATOM]; 115317683Spst b->et.code = b->s.code; 115417683Spst b->ef.code = -b->s.code; 115517683Spst} 115617683Spst 115717683Spst/* 115817683Spst * Return true if any register that is used on exit from 'succ', has 115917683Spst * an exit value that is different from the corresponding exit value 116017683Spst * from 'b'. 116117683Spst */ 116217683Spststatic int 116317683Spstuse_conflict(b, succ) 116417683Spst struct block *b, *succ; 116517683Spst{ 116617683Spst int atom; 116717683Spst atomset use = succ->out_use; 116817683Spst 116917683Spst if (use == 0) 117017683Spst return 0; 117117683Spst 117217683Spst for (atom = 0; atom < N_ATOMS; ++atom) 117317683Spst if (ATOMELEM(use, atom)) 117417683Spst if (b->val[atom] != succ->val[atom]) 117517683Spst return 1; 117617683Spst return 0; 117717683Spst} 117817683Spst 117917683Spststatic struct block * 118017683Spstfold_edge(child, ep) 118117683Spst struct block *child; 118217683Spst struct edge *ep; 118317683Spst{ 118417683Spst int sense; 118517683Spst int aval0, aval1, oval0, oval1; 118617683Spst int code = ep->code; 118717683Spst 118817683Spst if (code < 0) { 118917683Spst code = -code; 119017683Spst sense = 0; 119117683Spst } else 119217683Spst sense = 1; 119317683Spst 119417683Spst if (child->s.code != code) 119517683Spst return 0; 119617683Spst 119717683Spst aval0 = child->val[A_ATOM]; 119817683Spst oval0 = child->oval; 119917683Spst aval1 = ep->pred->val[A_ATOM]; 120017683Spst oval1 = ep->pred->oval; 120117683Spst 120217683Spst if (aval0 != aval1) 120317683Spst return 0; 120417683Spst 120517683Spst if (oval0 == oval1) 120617683Spst /* 120717683Spst * The operands are identical, so the 120817683Spst * result is true if a true branch was 120917683Spst * taken to get here, otherwise false. 121017683Spst */ 121117683Spst return sense ? JT(child) : JF(child); 121217683Spst 121317683Spst if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K)) 121417683Spst /* 121517683Spst * At this point, we only know the comparison if we 121617683Spst * came down the true branch, and it was an equality 121717683Spst * comparison with a constant. We rely on the fact that 121817683Spst * distinct constants have distinct value numbers. 121917683Spst */ 122017683Spst return JF(child); 122117683Spst 122217683Spst return 0; 122317683Spst} 122417683Spst 122517683Spststatic void 122617683Spstopt_j(ep) 122717683Spst struct edge *ep; 122817683Spst{ 122917683Spst register int i, k; 123017683Spst register struct block *target; 123117683Spst 123217683Spst if (JT(ep->succ) == 0) 123317683Spst return; 123417683Spst 123517683Spst if (JT(ep->succ) == JF(ep->succ)) { 123617683Spst /* 123717683Spst * Common branch targets can be eliminated, provided 123817683Spst * there is no data dependency. 123917683Spst */ 124017683Spst if (!use_conflict(ep->pred, ep->succ->et.succ)) { 124117683Spst done = 0; 124217683Spst ep->succ = JT(ep->succ); 124317683Spst } 124417683Spst } 124517683Spst /* 124617683Spst * For each edge dominator that matches the successor of this 124717683Spst * edge, promote the edge successor to the its grandchild. 124817683Spst * 124917683Spst * XXX We violate the set abstraction here in favor a reasonably 125017683Spst * efficient loop. 125117683Spst */ 125217683Spst top: 125317683Spst for (i = 0; i < edgewords; ++i) { 125417683Spst register bpf_u_int32 x = ep->edom[i]; 125517683Spst 125617683Spst while (x != 0) { 125717683Spst k = ffs(x) - 1; 125817683Spst x &=~ (1 << k); 125917683Spst k += i * BITS_PER_WORD; 126017683Spst 126117683Spst target = fold_edge(ep->succ, edges[k]); 126217683Spst /* 126317683Spst * Check that there is no data dependency between 126417683Spst * nodes that will be violated if we move the edge. 126517683Spst */ 126617683Spst if (target != 0 && !use_conflict(ep->pred, target)) { 126717683Spst done = 0; 126817683Spst ep->succ = target; 126917683Spst if (JT(target) != 0) 127017683Spst /* 127117683Spst * Start over unless we hit a leaf. 127217683Spst */ 127317683Spst goto top; 127417683Spst return; 127517683Spst } 127617683Spst } 127717683Spst } 127817683Spst} 127917683Spst 128017683Spst 128117683Spststatic void 128217683Spstor_pullup(b) 128317683Spst struct block *b; 128417683Spst{ 128517683Spst int val, at_top; 128617683Spst struct block *pull; 128717683Spst struct block **diffp, **samep; 128817683Spst struct edge *ep; 128917683Spst 129017683Spst ep = b->in_edges; 129117683Spst if (ep == 0) 129217683Spst return; 129317683Spst 129417683Spst /* 129517683Spst * Make sure each predecessor loads the same value. 129617683Spst * XXX why? 129717683Spst */ 129817683Spst val = ep->pred->val[A_ATOM]; 129917683Spst for (ep = ep->next; ep != 0; ep = ep->next) 130017683Spst if (val != ep->pred->val[A_ATOM]) 130117683Spst return; 130217683Spst 130317683Spst if (JT(b->in_edges->pred) == b) 130417683Spst diffp = &JT(b->in_edges->pred); 130517683Spst else 130617683Spst diffp = &JF(b->in_edges->pred); 130717683Spst 130817683Spst at_top = 1; 130917683Spst while (1) { 131017683Spst if (*diffp == 0) 131117683Spst return; 131217683Spst 131317683Spst if (JT(*diffp) != JT(b)) 131417683Spst return; 131517683Spst 131617683Spst if (!SET_MEMBER((*diffp)->dom, b->id)) 131717683Spst return; 131817683Spst 131917683Spst if ((*diffp)->val[A_ATOM] != val) 132017683Spst break; 132117683Spst 132217683Spst diffp = &JF(*diffp); 132317683Spst at_top = 0; 132417683Spst } 132517683Spst samep = &JF(*diffp); 132617683Spst while (1) { 132717683Spst if (*samep == 0) 132817683Spst return; 132917683Spst 133017683Spst if (JT(*samep) != JT(b)) 133117683Spst return; 133217683Spst 133317683Spst if (!SET_MEMBER((*samep)->dom, b->id)) 133417683Spst return; 133517683Spst 133617683Spst if ((*samep)->val[A_ATOM] == val) 133717683Spst break; 133817683Spst 133917683Spst /* XXX Need to check that there are no data dependencies 134017683Spst between dp0 and dp1. Currently, the code generator 134117683Spst will not produce such dependencies. */ 134217683Spst samep = &JF(*samep); 134317683Spst } 134417683Spst#ifdef notdef 134517683Spst /* XXX This doesn't cover everything. */ 134617683Spst for (i = 0; i < N_ATOMS; ++i) 134717683Spst if ((*samep)->val[i] != pred->val[i]) 134817683Spst return; 134917683Spst#endif 135017683Spst /* Pull up the node. */ 135117683Spst pull = *samep; 135217683Spst *samep = JF(pull); 135317683Spst JF(pull) = *diffp; 135417683Spst 135517683Spst /* 135617683Spst * At the top of the chain, each predecessor needs to point at the 135717683Spst * pulled up node. Inside the chain, there is only one predecessor 135817683Spst * to worry about. 135917683Spst */ 136017683Spst if (at_top) { 136117683Spst for (ep = b->in_edges; ep != 0; ep = ep->next) { 136217683Spst if (JT(ep->pred) == b) 136317683Spst JT(ep->pred) = pull; 136417683Spst else 136517683Spst JF(ep->pred) = pull; 136617683Spst } 136717683Spst } 136817683Spst else 136917683Spst *diffp = pull; 137017683Spst 137117683Spst done = 0; 137217683Spst} 137317683Spst 137417683Spststatic void 137517683Spstand_pullup(b) 137617683Spst struct block *b; 137717683Spst{ 137817683Spst int val, at_top; 137917683Spst struct block *pull; 138017683Spst struct block **diffp, **samep; 138117683Spst struct edge *ep; 138217683Spst 138317683Spst ep = b->in_edges; 138417683Spst if (ep == 0) 138517683Spst return; 138617683Spst 138717683Spst /* 138817683Spst * Make sure each predecessor loads the same value. 138917683Spst */ 139017683Spst val = ep->pred->val[A_ATOM]; 139117683Spst for (ep = ep->next; ep != 0; ep = ep->next) 139217683Spst if (val != ep->pred->val[A_ATOM]) 139317683Spst return; 139417683Spst 139517683Spst if (JT(b->in_edges->pred) == b) 139617683Spst diffp = &JT(b->in_edges->pred); 139717683Spst else 139817683Spst diffp = &JF(b->in_edges->pred); 139917683Spst 140017683Spst at_top = 1; 140117683Spst while (1) { 140217683Spst if (*diffp == 0) 140317683Spst return; 140417683Spst 140517683Spst if (JF(*diffp) != JF(b)) 140617683Spst return; 140717683Spst 140817683Spst if (!SET_MEMBER((*diffp)->dom, b->id)) 140917683Spst return; 141017683Spst 141117683Spst if ((*diffp)->val[A_ATOM] != val) 141217683Spst break; 141317683Spst 141417683Spst diffp = &JT(*diffp); 141517683Spst at_top = 0; 141617683Spst } 141717683Spst samep = &JT(*diffp); 141817683Spst while (1) { 141917683Spst if (*samep == 0) 142017683Spst return; 142117683Spst 142217683Spst if (JF(*samep) != JF(b)) 142317683Spst return; 142417683Spst 142517683Spst if (!SET_MEMBER((*samep)->dom, b->id)) 142617683Spst return; 142717683Spst 142817683Spst if ((*samep)->val[A_ATOM] == val) 142917683Spst break; 143017683Spst 143117683Spst /* XXX Need to check that there are no data dependencies 143217683Spst between diffp and samep. Currently, the code generator 143317683Spst will not produce such dependencies. */ 143417683Spst samep = &JT(*samep); 143517683Spst } 143617683Spst#ifdef notdef 143717683Spst /* XXX This doesn't cover everything. */ 143817683Spst for (i = 0; i < N_ATOMS; ++i) 143917683Spst if ((*samep)->val[i] != pred->val[i]) 144017683Spst return; 144117683Spst#endif 144217683Spst /* Pull up the node. */ 144317683Spst pull = *samep; 144417683Spst *samep = JT(pull); 144517683Spst JT(pull) = *diffp; 144617683Spst 144717683Spst /* 144817683Spst * At the top of the chain, each predecessor needs to point at the 144917683Spst * pulled up node. Inside the chain, there is only one predecessor 145017683Spst * to worry about. 145117683Spst */ 145217683Spst if (at_top) { 145317683Spst for (ep = b->in_edges; ep != 0; ep = ep->next) { 145417683Spst if (JT(ep->pred) == b) 145517683Spst JT(ep->pred) = pull; 145617683Spst else 145717683Spst JF(ep->pred) = pull; 145817683Spst } 145917683Spst } 146017683Spst else 146117683Spst *diffp = pull; 146217683Spst 146317683Spst done = 0; 146417683Spst} 146517683Spst 146617683Spststatic void 146717683Spstopt_blks(root, do_stmts) 146817683Spst struct block *root; 146917683Spst int do_stmts; 147017683Spst{ 147117683Spst int i, maxlevel; 147217683Spst struct block *p; 147317683Spst 147417683Spst init_val(); 147517683Spst maxlevel = root->level; 147617683Spst for (i = maxlevel; i >= 0; --i) 147717683Spst for (p = levels[i]; p; p = p->link) 147817683Spst opt_blk(p, do_stmts); 147917683Spst 148017683Spst if (do_stmts) 148117683Spst /* 148217683Spst * No point trying to move branches; it can't possibly 148317683Spst * make a difference at this point. 148417683Spst */ 148517683Spst return; 148617683Spst 148717683Spst for (i = 1; i <= maxlevel; ++i) { 148817683Spst for (p = levels[i]; p; p = p->link) { 148917683Spst opt_j(&p->et); 149017683Spst opt_j(&p->ef); 149117683Spst } 149217683Spst } 149317683Spst for (i = 1; i <= maxlevel; ++i) { 149417683Spst for (p = levels[i]; p; p = p->link) { 149517683Spst or_pullup(p); 149617683Spst and_pullup(p); 149717683Spst } 149817683Spst } 149917683Spst} 150017683Spst 150117683Spststatic inline void 150217683Spstlink_inedge(parent, child) 150317683Spst struct edge *parent; 150417683Spst struct block *child; 150517683Spst{ 150617683Spst parent->next = child->in_edges; 150717683Spst child->in_edges = parent; 150817683Spst} 150917683Spst 151017683Spststatic void 151117683Spstfind_inedges(root) 151217683Spst struct block *root; 151317683Spst{ 151417683Spst int i; 151517683Spst struct block *b; 151617683Spst 151717683Spst for (i = 0; i < n_blocks; ++i) 151817683Spst blocks[i]->in_edges = 0; 151917683Spst 152017683Spst /* 152117683Spst * Traverse the graph, adding each edge to the predecessor 152217683Spst * list of its successors. Skip the leaves (i.e. level 0). 152317683Spst */ 152417683Spst for (i = root->level; i > 0; --i) { 152517683Spst for (b = levels[i]; b != 0; b = b->link) { 152617683Spst link_inedge(&b->et, JT(b)); 152717683Spst link_inedge(&b->ef, JF(b)); 152817683Spst } 152917683Spst } 153017683Spst} 153117683Spst 153217683Spststatic void 153317683Spstopt_root(b) 153417683Spst struct block **b; 153517683Spst{ 153617683Spst struct slist *tmp, *s; 153717683Spst 153817683Spst s = (*b)->stmts; 153917683Spst (*b)->stmts = 0; 154017683Spst while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b)) 154117683Spst *b = JT(*b); 154217683Spst 154317683Spst tmp = (*b)->stmts; 154417683Spst if (tmp != 0) 154517683Spst sappend(s, tmp); 154617683Spst (*b)->stmts = s; 154717683Spst 154817683Spst /* 154917683Spst * If the root node is a return, then there is no 155017683Spst * point executing any statements (since the bpf machine 155117683Spst * has no side effects). 155217683Spst */ 155317683Spst if (BPF_CLASS((*b)->s.code) == BPF_RET) 155417683Spst (*b)->stmts = 0; 155517683Spst} 155617683Spst 155717683Spststatic void 155817683Spstopt_loop(root, do_stmts) 155917683Spst struct block *root; 156017683Spst int do_stmts; 156117683Spst{ 156217683Spst 156317683Spst#ifdef BDEBUG 156417683Spst if (dflag > 1) 156517683Spst opt_dump(root); 156617683Spst#endif 156717683Spst do { 156817683Spst done = 1; 156917683Spst find_levels(root); 157017683Spst find_dom(root); 157117683Spst find_closure(root); 157217683Spst find_inedges(root); 157317683Spst find_ud(root); 157417683Spst find_edom(root); 157517683Spst opt_blks(root, do_stmts); 157617683Spst#ifdef BDEBUG 157717683Spst if (dflag > 1) 157817683Spst opt_dump(root); 157917683Spst#endif 158017683Spst } while (!done); 158117683Spst} 158217683Spst 158317683Spst/* 158417683Spst * Optimize the filter code in its dag representation. 158517683Spst */ 158617683Spstvoid 158717683Spstbpf_optimize(rootp) 158817683Spst struct block **rootp; 158917683Spst{ 159017683Spst struct block *root; 159117683Spst 159217683Spst root = *rootp; 159317683Spst 159417683Spst opt_init(root); 159517683Spst opt_loop(root, 0); 159617683Spst opt_loop(root, 1); 159717683Spst intern_blocks(root); 159817683Spst opt_root(rootp); 159917683Spst opt_cleanup(); 160017683Spst} 160117683Spst 160217683Spststatic void 160317683Spstmake_marks(p) 160417683Spst struct block *p; 160517683Spst{ 160617683Spst if (!isMarked(p)) { 160717683Spst Mark(p); 160817683Spst if (BPF_CLASS(p->s.code) != BPF_RET) { 160917683Spst make_marks(JT(p)); 161017683Spst make_marks(JF(p)); 161117683Spst } 161217683Spst } 161317683Spst} 161417683Spst 161517683Spst/* 161617683Spst * Mark code array such that isMarked(i) is true 161717683Spst * only for nodes that are alive. 161817683Spst */ 161917683Spststatic void 162017683Spstmark_code(p) 162117683Spst struct block *p; 162217683Spst{ 162317683Spst cur_mark += 1; 162417683Spst make_marks(p); 162517683Spst} 162617683Spst 162717683Spst/* 162817683Spst * True iff the two stmt lists load the same value from the packet into 162917683Spst * the accumulator. 163017683Spst */ 163117683Spststatic int 163217683Spsteq_slist(x, y) 163317683Spst struct slist *x, *y; 163417683Spst{ 163517683Spst while (1) { 163617683Spst while (x && x->s.code == NOP) 163717683Spst x = x->next; 163817683Spst while (y && y->s.code == NOP) 163917683Spst y = y->next; 164017683Spst if (x == 0) 164117683Spst return y == 0; 164217683Spst if (y == 0) 164317683Spst return x == 0; 164417683Spst if (x->s.code != y->s.code || x->s.k != y->s.k) 164517683Spst return 0; 164617683Spst x = x->next; 164717683Spst y = y->next; 164817683Spst } 164917683Spst} 165017683Spst 165117683Spststatic inline int 165217683Spsteq_blk(b0, b1) 165317683Spst struct block *b0, *b1; 165417683Spst{ 165517683Spst if (b0->s.code == b1->s.code && 165617683Spst b0->s.k == b1->s.k && 165717683Spst b0->et.succ == b1->et.succ && 165817683Spst b0->ef.succ == b1->ef.succ) 165917683Spst return eq_slist(b0->stmts, b1->stmts); 166017683Spst return 0; 166117683Spst} 166217683Spst 166317683Spststatic void 166417683Spstintern_blocks(root) 166517683Spst struct block *root; 166617683Spst{ 166717683Spst struct block *p; 166817683Spst int i, j; 166917683Spst int done; 167017683Spst top: 167117683Spst done = 1; 167217683Spst for (i = 0; i < n_blocks; ++i) 167317683Spst blocks[i]->link = 0; 167417683Spst 167517683Spst mark_code(root); 167617683Spst 167717683Spst for (i = n_blocks - 1; --i >= 0; ) { 167817683Spst if (!isMarked(blocks[i])) 167917683Spst continue; 168017683Spst for (j = i + 1; j < n_blocks; ++j) { 168117683Spst if (!isMarked(blocks[j])) 168217683Spst continue; 168317683Spst if (eq_blk(blocks[i], blocks[j])) { 168417683Spst blocks[i]->link = blocks[j]->link ? 168517683Spst blocks[j]->link : blocks[j]; 168617683Spst break; 168717683Spst } 168817683Spst } 168917683Spst } 169017683Spst for (i = 0; i < n_blocks; ++i) { 169117683Spst p = blocks[i]; 169217683Spst if (JT(p) == 0) 169317683Spst continue; 169417683Spst if (JT(p)->link) { 169517683Spst done = 0; 169617683Spst JT(p) = JT(p)->link; 169717683Spst } 169817683Spst if (JF(p)->link) { 169917683Spst done = 0; 170017683Spst JF(p) = JF(p)->link; 170117683Spst } 170217683Spst } 170317683Spst if (!done) 170417683Spst goto top; 170517683Spst} 170617683Spst 170717683Spststatic void 170817683Spstopt_cleanup() 170917683Spst{ 171017683Spst free((void *)vnode_base); 171117683Spst free((void *)vmap); 171217683Spst free((void *)edges); 171317683Spst free((void *)space); 171417683Spst free((void *)levels); 171517683Spst free((void *)blocks); 171617683Spst} 171717683Spst 171817683Spst/* 171917683Spst * Return the number of stmts in 's'. 172017683Spst */ 172117683Spststatic int 172217683Spstslength(s) 172317683Spst struct slist *s; 172417683Spst{ 172517683Spst int n = 0; 172617683Spst 172717683Spst for (; s; s = s->next) 172817683Spst if (s->s.code != NOP) 172917683Spst ++n; 173017683Spst return n; 173117683Spst} 173217683Spst 173317683Spst/* 173417683Spst * Return the number of nodes reachable by 'p'. 173517683Spst * All nodes should be initially unmarked. 173617683Spst */ 173717683Spststatic int 173817683Spstcount_blocks(p) 173917683Spst struct block *p; 174017683Spst{ 174117683Spst if (p == 0 || isMarked(p)) 174217683Spst return 0; 174317683Spst Mark(p); 174417683Spst return count_blocks(JT(p)) + count_blocks(JF(p)) + 1; 174517683Spst} 174617683Spst 174717683Spst/* 174817683Spst * Do a depth first search on the flow graph, numbering the 174917683Spst * the basic blocks, and entering them into the 'blocks' array.` 175017683Spst */ 175117683Spststatic void 175217683Spstnumber_blks_r(p) 175317683Spst struct block *p; 175417683Spst{ 175517683Spst int n; 175617683Spst 175717683Spst if (p == 0 || isMarked(p)) 175817683Spst return; 175917683Spst 176017683Spst Mark(p); 176117683Spst n = n_blocks++; 176217683Spst p->id = n; 176317683Spst blocks[n] = p; 176417683Spst 176517683Spst number_blks_r(JT(p)); 176617683Spst number_blks_r(JF(p)); 176717683Spst} 176817683Spst 176917683Spst/* 177017683Spst * Return the number of stmts in the flowgraph reachable by 'p'. 177117683Spst * The nodes should be unmarked before calling. 177217683Spst */ 177317683Spststatic int 177417683Spstcount_stmts(p) 177517683Spst struct block *p; 177617683Spst{ 177717683Spst int n; 177817683Spst 177917683Spst if (p == 0 || isMarked(p)) 178017683Spst return 0; 178117683Spst Mark(p); 178217683Spst n = count_stmts(JT(p)) + count_stmts(JF(p)); 178317683Spst return slength(p->stmts) + n + 1; 178417683Spst} 178517683Spst 178617683Spst/* 178717683Spst * Allocate memory. All allocation is done before optimization 178817683Spst * is begun. A linear bound on the size of all data structures is computed 178917683Spst * from the total number of blocks and/or statements. 179017683Spst */ 179117683Spststatic void 179217683Spstopt_init(root) 179317683Spst struct block *root; 179417683Spst{ 179517683Spst bpf_u_int32 *p; 179617683Spst int i, n, max_stmts; 179717683Spst 179817683Spst /* 179917683Spst * First, count the blocks, so we can malloc an array to map 180017683Spst * block number to block. Then, put the blocks into the array. 180117683Spst */ 180217683Spst unMarkAll(); 180317683Spst n = count_blocks(root); 180417683Spst blocks = (struct block **)malloc(n * sizeof(*blocks)); 180517683Spst unMarkAll(); 180617683Spst n_blocks = 0; 180717683Spst number_blks_r(root); 180817683Spst 180917683Spst n_edges = 2 * n_blocks; 181017683Spst edges = (struct edge **)malloc(n_edges * sizeof(*edges)); 181117683Spst 181217683Spst /* 181317683Spst * The number of levels is bounded by the number of nodes. 181417683Spst */ 181517683Spst levels = (struct block **)malloc(n_blocks * sizeof(*levels)); 181617683Spst 181717683Spst edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1; 181817683Spst nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1; 181917683Spst 182017683Spst /* XXX */ 182117683Spst space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space) 182217683Spst + n_edges * edgewords * sizeof(*space)); 182317683Spst p = space; 182417683Spst all_dom_sets = p; 182517683Spst for (i = 0; i < n; ++i) { 182617683Spst blocks[i]->dom = p; 182717683Spst p += nodewords; 182817683Spst } 182917683Spst all_closure_sets = p; 183017683Spst for (i = 0; i < n; ++i) { 183117683Spst blocks[i]->closure = p; 183217683Spst p += nodewords; 183317683Spst } 183417683Spst all_edge_sets = p; 183517683Spst for (i = 0; i < n; ++i) { 183617683Spst register struct block *b = blocks[i]; 183717683Spst 183817683Spst b->et.edom = p; 183917683Spst p += edgewords; 184017683Spst b->ef.edom = p; 184117683Spst p += edgewords; 184217683Spst b->et.id = i; 184317683Spst edges[i] = &b->et; 184417683Spst b->ef.id = n_blocks + i; 184517683Spst edges[n_blocks + i] = &b->ef; 184617683Spst b->et.pred = b; 184717683Spst b->ef.pred = b; 184817683Spst } 184917683Spst max_stmts = 0; 185017683Spst for (i = 0; i < n; ++i) 185117683Spst max_stmts += slength(blocks[i]->stmts) + 1; 185217683Spst /* 185317683Spst * We allocate at most 3 value numbers per statement, 185417683Spst * so this is an upper bound on the number of valnodes 185517683Spst * we'll need. 185617683Spst */ 185717683Spst maxval = 3 * max_stmts; 185817683Spst vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap)); 185917683Spst vnode_base = (struct valnode *)malloc(maxval * sizeof(*vmap)); 186017683Spst} 186117683Spst 186217683Spst/* 186317683Spst * Some pointers used to convert the basic block form of the code, 186417683Spst * into the array form that BPF requires. 'fstart' will point to 186517683Spst * the malloc'd array while 'ftail' is used during the recursive traversal. 186617683Spst */ 186717683Spststatic struct bpf_insn *fstart; 186817683Spststatic struct bpf_insn *ftail; 186917683Spst 187017683Spst#ifdef BDEBUG 187117683Spstint bids[1000]; 187217683Spst#endif 187317683Spst 187417683Spst/* 187517683Spst * Returns true if successful. Returns false if a branch has 187617683Spst * an offset that is too large. If so, we have marked that 187717683Spst * branch so that on a subsequent iteration, it will be treated 187817683Spst * properly. 187917683Spst */ 188017683Spststatic int 188117683Spstconvert_code_r(p) 188217683Spst struct block *p; 188317683Spst{ 188417683Spst struct bpf_insn *dst; 188517683Spst struct slist *src; 188617683Spst int slen; 188717683Spst u_int off; 188817683Spst int extrajmps; /* number of extra jumps inserted */ 188917683Spst 189017683Spst if (p == 0 || isMarked(p)) 189117683Spst return (1); 189217683Spst Mark(p); 189317683Spst 189417683Spst if (convert_code_r(JF(p)) == 0) 189517683Spst return (0); 189617683Spst if (convert_code_r(JT(p)) == 0) 189717683Spst return (0); 189817683Spst 189917683Spst slen = slength(p->stmts); 190017683Spst dst = ftail -= (slen + 1 + p->longjt + p->longjf); 190117683Spst /* inflate length by any extra jumps */ 190217683Spst 190317683Spst p->offset = dst - fstart; 190417683Spst 190517683Spst for (src = p->stmts; src; src = src->next) { 190617683Spst if (src->s.code == NOP) 190717683Spst continue; 190817683Spst dst->code = (u_short)src->s.code; 190917683Spst dst->k = src->s.k; 191017683Spst ++dst; 191117683Spst } 191217683Spst#ifdef BDEBUG 191317683Spst bids[dst - fstart] = p->id + 1; 191417683Spst#endif 191517683Spst dst->code = (u_short)p->s.code; 191617683Spst dst->k = p->s.k; 191717683Spst if (JT(p)) { 191817683Spst extrajmps = 0; 191917683Spst off = JT(p)->offset - (p->offset + slen) - 1; 192017683Spst if (off >= 256) { 192117683Spst /* offset too large for branch, must add a jump */ 192217683Spst if (p->longjt == 0) { 192317683Spst /* mark this instruction and retry */ 192417683Spst p->longjt++; 192517683Spst return(0); 192617683Spst } 192717683Spst /* branch if T to following jump */ 192817683Spst dst->jt = extrajmps; 192917683Spst extrajmps++; 193017683Spst dst[extrajmps].code = BPF_JMP|BPF_JA; 193117683Spst dst[extrajmps].k = off - extrajmps; 193217683Spst } 193317683Spst else 193417683Spst dst->jt = off; 193517683Spst off = JF(p)->offset - (p->offset + slen) - 1; 193617683Spst if (off >= 256) { 193717683Spst /* offset too large for branch, must add a jump */ 193817683Spst if (p->longjf == 0) { 193917683Spst /* mark this instruction and retry */ 194017683Spst p->longjf++; 194117683Spst return(0); 194217683Spst } 194317683Spst /* branch if F to following jump */ 194417683Spst /* if two jumps are inserted, F goes to second one */ 194517683Spst dst->jf = extrajmps; 194617683Spst extrajmps++; 194717683Spst dst[extrajmps].code = BPF_JMP|BPF_JA; 194817683Spst dst[extrajmps].k = off - extrajmps; 194917683Spst } 195017683Spst else 195117683Spst dst->jf = off; 195217683Spst } 195317683Spst return (1); 195417683Spst} 195517683Spst 195617683Spst 195717683Spst/* 195817683Spst * Convert flowgraph intermediate representation to the 195917683Spst * BPF array representation. Set *lenp to the number of instructions. 196017683Spst */ 196117683Spststruct bpf_insn * 196217683Spsticode_to_fcode(root, lenp) 196317683Spst struct block *root; 196417683Spst int *lenp; 196517683Spst{ 196617683Spst int n; 196717683Spst struct bpf_insn *fp; 196817683Spst 196917683Spst /* 197017683Spst * Loop doing convert_codr_r() until no branches remain 197117683Spst * with too-large offsets. 197217683Spst */ 197317683Spst while (1) { 197417683Spst unMarkAll(); 197517683Spst n = *lenp = count_stmts(root); 197617683Spst 197717683Spst fp = (struct bpf_insn *)malloc(sizeof(*fp) * n); 197817683Spst memset((char *)fp, 0, sizeof(*fp) * n); 197917683Spst fstart = fp; 198017683Spst ftail = fp + n; 198117683Spst 198217683Spst unMarkAll(); 198317683Spst if (convert_code_r(root)) 198417683Spst break; 198517683Spst free(fp); 198617683Spst } 198717683Spst 198817683Spst return fp; 198917683Spst} 199017683Spst 199117683Spst#ifdef BDEBUG 199217683Spststatic void 199317683Spstopt_dump(root) 199417683Spst struct block *root; 199517683Spst{ 199617683Spst struct bpf_program f; 199717683Spst 199817683Spst memset(bids, 0, sizeof bids); 199917683Spst f.bf_insns = icode_to_fcode(root, &f.bf_len); 200017683Spst bpf_dump(&f, 1); 200117683Spst putchar('\n'); 200217683Spst free((char *)f.bf_insns); 200317683Spst} 200417683Spst#endif 2005