optimize.c revision 56889
117683Spst/* 217683Spst * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996 317683Spst * The Regents of the University of California. All rights reserved. 417683Spst * 517683Spst * Redistribution and use in source and binary forms, with or without 617683Spst * modification, are permitted provided that: (1) source code distributions 717683Spst * retain the above copyright notice and this paragraph in its entirety, (2) 817683Spst * distributions including binary code include the above copyright notice and 917683Spst * this paragraph in its entirety in the documentation or other materials 1017683Spst * provided with the distribution, and (3) all advertising materials mentioning 1117683Spst * features or use of this software display the following acknowledgement: 1217683Spst * ``This product includes software developed by the University of California, 1317683Spst * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of 1417683Spst * the University nor the names of its contributors may be used to endorse 1517683Spst * or promote products derived from this software without specific prior 1617683Spst * written permission. 1717683Spst * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 1817683Spst * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 1917683Spst * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 2017683Spst * 2117683Spst * Optimization module for tcpdump intermediate representation. 2217683Spst */ 2317683Spst#ifndef lint 2426175Sfennerstatic const char rcsid[] = 2556889Sfenner "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.61 1999/10/19 15:18:30 itojun Exp $ (LBL)"; 2617683Spst#endif 2717683Spst 2817683Spst#include <sys/types.h> 2917683Spst#include <sys/time.h> 3017683Spst 3117683Spst#include <stdio.h> 3217683Spst#include <stdlib.h> 3317683Spst#include <memory.h> 3417683Spst 3517683Spst#include "pcap-int.h" 3617683Spst 3717683Spst#include "gencode.h" 3817683Spst 3917683Spst#include "gnuc.h" 4017683Spst#ifdef HAVE_OS_PROTO_H 4117683Spst#include "os-proto.h" 4217683Spst#endif 4317683Spst 4417683Spst#ifdef BDEBUG 4517683Spstextern int dflag; 4617683Spst#endif 4717683Spst 4817683Spst#define A_ATOM BPF_MEMWORDS 4917683Spst#define X_ATOM (BPF_MEMWORDS+1) 5017683Spst 5117683Spst#define NOP -1 5217683Spst 5317683Spst/* 5417683Spst * This define is used to represent *both* the accumulator and 5517683Spst * x register in use-def computations. 5617683Spst * Currently, the use-def code assumes only one definition per instruction. 5717683Spst */ 5817683Spst#define AX_ATOM N_ATOMS 5917683Spst 6017683Spst/* 6117683Spst * A flag to indicate that further optimization is needed. 6217683Spst * Iterative passes are continued until a given pass yields no 6317683Spst * branch movement. 6417683Spst */ 6517683Spststatic int done; 6617683Spst 6717683Spst/* 6817683Spst * A block is marked if only if its mark equals the current mark. 6917683Spst * Rather than traverse the code array, marking each item, 'cur_mark' is 7017683Spst * incremented. This automatically makes each element unmarked. 7117683Spst */ 7217683Spststatic int cur_mark; 7317683Spst#define isMarked(p) ((p)->mark == cur_mark) 7417683Spst#define unMarkAll() cur_mark += 1 7517683Spst#define Mark(p) ((p)->mark = cur_mark) 7617683Spst 7717683Spststatic void opt_init(struct block *); 7817683Spststatic void opt_cleanup(void); 7917683Spst 8017683Spststatic void make_marks(struct block *); 8117683Spststatic void mark_code(struct block *); 8217683Spst 8317683Spststatic void intern_blocks(struct block *); 8417683Spst 8517683Spststatic int eq_slist(struct slist *, struct slist *); 8617683Spst 8717683Spststatic void find_levels_r(struct block *); 8817683Spst 8917683Spststatic void find_levels(struct block *); 9017683Spststatic void find_dom(struct block *); 9117683Spststatic void propedom(struct edge *); 9217683Spststatic void find_edom(struct block *); 9317683Spststatic void find_closure(struct block *); 9417683Spststatic int atomuse(struct stmt *); 9517683Spststatic int atomdef(struct stmt *); 9617683Spststatic void compute_local_ud(struct block *); 9717683Spststatic void find_ud(struct block *); 9817683Spststatic void init_val(void); 9917683Spststatic int F(int, int, int); 10017683Spststatic inline void vstore(struct stmt *, int *, int, int); 10117683Spststatic void opt_blk(struct block *, int); 10217683Spststatic int use_conflict(struct block *, struct block *); 10317683Spststatic void opt_j(struct edge *); 10417683Spststatic void or_pullup(struct block *); 10517683Spststatic void and_pullup(struct block *); 10617683Spststatic void opt_blks(struct block *, int); 10717683Spststatic inline void link_inedge(struct edge *, struct block *); 10817683Spststatic void find_inedges(struct block *); 10917683Spststatic void opt_root(struct block **); 11017683Spststatic void opt_loop(struct block *, int); 11117683Spststatic void fold_op(struct stmt *, int, int); 11217683Spststatic inline struct slist *this_op(struct slist *); 11317683Spststatic void opt_not(struct block *); 11417683Spststatic void opt_peep(struct block *); 11517683Spststatic void opt_stmt(struct stmt *, int[], int); 11617683Spststatic void deadstmt(struct stmt *, struct stmt *[]); 11717683Spststatic void opt_deadstores(struct block *); 11817683Spststatic void opt_blk(struct block *, int); 11917683Spststatic int use_conflict(struct block *, struct block *); 12017683Spststatic void opt_j(struct edge *); 12117683Spststatic struct block *fold_edge(struct block *, struct edge *); 12217683Spststatic inline int eq_blk(struct block *, struct block *); 12317683Spststatic int slength(struct slist *); 12417683Spststatic int count_blocks(struct block *); 12517683Spststatic void number_blks_r(struct block *); 12617683Spststatic int count_stmts(struct block *); 12717683Spststatic int convert_code_r(struct block *); 12817683Spst#ifdef BDEBUG 12917683Spststatic void opt_dump(struct block *); 13017683Spst#endif 13117683Spst 13217683Spststatic int n_blocks; 13317683Spststruct block **blocks; 13417683Spststatic int n_edges; 13517683Spststruct edge **edges; 13617683Spst 13717683Spst/* 13817683Spst * A bit vector set representation of the dominators. 13917683Spst * We round up the set size to the next power of two. 14017683Spst */ 14117683Spststatic int nodewords; 14217683Spststatic int edgewords; 14317683Spststruct block **levels; 14417683Spstbpf_u_int32 *space; 14517683Spst#define BITS_PER_WORD (8*sizeof(bpf_u_int32)) 14617683Spst/* 14717683Spst * True if a is in uset {p} 14817683Spst */ 14917683Spst#define SET_MEMBER(p, a) \ 15017683Spst((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD))) 15117683Spst 15217683Spst/* 15317683Spst * Add 'a' to uset p. 15417683Spst */ 15517683Spst#define SET_INSERT(p, a) \ 15617683Spst(p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD)) 15717683Spst 15817683Spst/* 15917683Spst * Delete 'a' from uset p. 16017683Spst */ 16117683Spst#define SET_DELETE(p, a) \ 16217683Spst(p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD)) 16317683Spst 16417683Spst/* 16517683Spst * a := a intersect b 16617683Spst */ 16717683Spst#define SET_INTERSECT(a, b, n)\ 16817683Spst{\ 16917683Spst register bpf_u_int32 *_x = a, *_y = b;\ 17017683Spst register int _n = n;\ 17117683Spst while (--_n >= 0) *_x++ &= *_y++;\ 17217683Spst} 17317683Spst 17417683Spst/* 17517683Spst * a := a - b 17617683Spst */ 17717683Spst#define SET_SUBTRACT(a, b, n)\ 17817683Spst{\ 17917683Spst register bpf_u_int32 *_x = a, *_y = b;\ 18017683Spst register int _n = n;\ 18117683Spst while (--_n >= 0) *_x++ &=~ *_y++;\ 18217683Spst} 18317683Spst 18417683Spst/* 18517683Spst * a := a union b 18617683Spst */ 18717683Spst#define SET_UNION(a, b, n)\ 18817683Spst{\ 18917683Spst register bpf_u_int32 *_x = a, *_y = b;\ 19017683Spst register int _n = n;\ 19117683Spst while (--_n >= 0) *_x++ |= *_y++;\ 19217683Spst} 19317683Spst 19417683Spststatic uset all_dom_sets; 19517683Spststatic uset all_closure_sets; 19617683Spststatic uset all_edge_sets; 19717683Spst 19817683Spst#ifndef MAX 19917683Spst#define MAX(a,b) ((a)>(b)?(a):(b)) 20017683Spst#endif 20117683Spst 20217683Spststatic void 20317683Spstfind_levels_r(b) 20417683Spst struct block *b; 20517683Spst{ 20617683Spst int level; 20717683Spst 20817683Spst if (isMarked(b)) 20917683Spst return; 21017683Spst 21117683Spst Mark(b); 21217683Spst b->link = 0; 21317683Spst 21417683Spst if (JT(b)) { 21517683Spst find_levels_r(JT(b)); 21617683Spst find_levels_r(JF(b)); 21717683Spst level = MAX(JT(b)->level, JF(b)->level) + 1; 21817683Spst } else 21917683Spst level = 0; 22017683Spst b->level = level; 22117683Spst b->link = levels[level]; 22217683Spst levels[level] = b; 22317683Spst} 22417683Spst 22517683Spst/* 22617683Spst * Level graph. The levels go from 0 at the leaves to 22717683Spst * N_LEVELS at the root. The levels[] array points to the 22817683Spst * first node of the level list, whose elements are linked 22917683Spst * with the 'link' field of the struct block. 23017683Spst */ 23117683Spststatic void 23217683Spstfind_levels(root) 23317683Spst struct block *root; 23417683Spst{ 23517683Spst memset((char *)levels, 0, n_blocks * sizeof(*levels)); 23617683Spst unMarkAll(); 23717683Spst find_levels_r(root); 23817683Spst} 23917683Spst 24017683Spst/* 24117683Spst * Find dominator relationships. 24217683Spst * Assumes graph has been leveled. 24317683Spst */ 24417683Spststatic void 24517683Spstfind_dom(root) 24617683Spst struct block *root; 24717683Spst{ 24817683Spst int i; 24917683Spst struct block *b; 25017683Spst bpf_u_int32 *x; 25117683Spst 25217683Spst /* 25317683Spst * Initialize sets to contain all nodes. 25417683Spst */ 25517683Spst x = all_dom_sets; 25617683Spst i = n_blocks * nodewords; 25717683Spst while (--i >= 0) 25817683Spst *x++ = ~0; 25917683Spst /* Root starts off empty. */ 26017683Spst for (i = nodewords; --i >= 0;) 26117683Spst root->dom[i] = 0; 26217683Spst 26317683Spst /* root->level is the highest level no found. */ 26417683Spst for (i = root->level; i >= 0; --i) { 26517683Spst for (b = levels[i]; b; b = b->link) { 26617683Spst SET_INSERT(b->dom, b->id); 26717683Spst if (JT(b) == 0) 26817683Spst continue; 26917683Spst SET_INTERSECT(JT(b)->dom, b->dom, nodewords); 27017683Spst SET_INTERSECT(JF(b)->dom, b->dom, nodewords); 27117683Spst } 27217683Spst } 27317683Spst} 27417683Spst 27517683Spststatic void 27617683Spstpropedom(ep) 27717683Spst struct edge *ep; 27817683Spst{ 27917683Spst SET_INSERT(ep->edom, ep->id); 28017683Spst if (ep->succ) { 28117683Spst SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords); 28217683Spst SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords); 28317683Spst } 28417683Spst} 28517683Spst 28617683Spst/* 28717683Spst * Compute edge dominators. 28817683Spst * Assumes graph has been leveled and predecessors established. 28917683Spst */ 29017683Spststatic void 29117683Spstfind_edom(root) 29217683Spst struct block *root; 29317683Spst{ 29417683Spst int i; 29517683Spst uset x; 29617683Spst struct block *b; 29717683Spst 29817683Spst x = all_edge_sets; 29917683Spst for (i = n_edges * edgewords; --i >= 0; ) 30017683Spst x[i] = ~0; 30117683Spst 30217683Spst /* root->level is the highest level no found. */ 30317683Spst memset(root->et.edom, 0, edgewords * sizeof(*(uset)0)); 30417683Spst memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0)); 30517683Spst for (i = root->level; i >= 0; --i) { 30617683Spst for (b = levels[i]; b != 0; b = b->link) { 30717683Spst propedom(&b->et); 30817683Spst propedom(&b->ef); 30917683Spst } 31017683Spst } 31117683Spst} 31217683Spst 31317683Spst/* 31417683Spst * Find the backwards transitive closure of the flow graph. These sets 31517683Spst * are backwards in the sense that we find the set of nodes that reach 31617683Spst * a given node, not the set of nodes that can be reached by a node. 31717683Spst * 31817683Spst * Assumes graph has been leveled. 31917683Spst */ 32017683Spststatic void 32117683Spstfind_closure(root) 32217683Spst struct block *root; 32317683Spst{ 32417683Spst int i; 32517683Spst struct block *b; 32617683Spst 32717683Spst /* 32817683Spst * Initialize sets to contain no nodes. 32917683Spst */ 33017683Spst memset((char *)all_closure_sets, 0, 33117683Spst n_blocks * nodewords * sizeof(*all_closure_sets)); 33217683Spst 33317683Spst /* root->level is the highest level no found. */ 33417683Spst for (i = root->level; i >= 0; --i) { 33517683Spst for (b = levels[i]; b; b = b->link) { 33617683Spst SET_INSERT(b->closure, b->id); 33717683Spst if (JT(b) == 0) 33817683Spst continue; 33917683Spst SET_UNION(JT(b)->closure, b->closure, nodewords); 34017683Spst SET_UNION(JF(b)->closure, b->closure, nodewords); 34117683Spst } 34217683Spst } 34317683Spst} 34417683Spst 34517683Spst/* 34617683Spst * Return the register number that is used by s. If A and X are both 34717683Spst * used, return AX_ATOM. If no register is used, return -1. 34817683Spst * 34917683Spst * The implementation should probably change to an array access. 35017683Spst */ 35117683Spststatic int 35217683Spstatomuse(s) 35317683Spst struct stmt *s; 35417683Spst{ 35517683Spst register int c = s->code; 35617683Spst 35717683Spst if (c == NOP) 35817683Spst return -1; 35917683Spst 36017683Spst switch (BPF_CLASS(c)) { 36117683Spst 36217683Spst case BPF_RET: 36317683Spst return (BPF_RVAL(c) == BPF_A) ? A_ATOM : 36417683Spst (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1; 36517683Spst 36617683Spst case BPF_LD: 36717683Spst case BPF_LDX: 36817683Spst return (BPF_MODE(c) == BPF_IND) ? X_ATOM : 36917683Spst (BPF_MODE(c) == BPF_MEM) ? s->k : -1; 37017683Spst 37117683Spst case BPF_ST: 37217683Spst return A_ATOM; 37317683Spst 37417683Spst case BPF_STX: 37517683Spst return X_ATOM; 37617683Spst 37717683Spst case BPF_JMP: 37817683Spst case BPF_ALU: 37917683Spst if (BPF_SRC(c) == BPF_X) 38017683Spst return AX_ATOM; 38117683Spst return A_ATOM; 38217683Spst 38317683Spst case BPF_MISC: 38417683Spst return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM; 38517683Spst } 38617683Spst abort(); 38717683Spst /* NOTREACHED */ 38817683Spst} 38917683Spst 39017683Spst/* 39117683Spst * Return the register number that is defined by 's'. We assume that 39217683Spst * a single stmt cannot define more than one register. If no register 39317683Spst * is defined, return -1. 39417683Spst * 39517683Spst * The implementation should probably change to an array access. 39617683Spst */ 39717683Spststatic int 39817683Spstatomdef(s) 39917683Spst struct stmt *s; 40017683Spst{ 40117683Spst if (s->code == NOP) 40217683Spst return -1; 40317683Spst 40417683Spst switch (BPF_CLASS(s->code)) { 40517683Spst 40617683Spst case BPF_LD: 40717683Spst case BPF_ALU: 40817683Spst return A_ATOM; 40917683Spst 41017683Spst case BPF_LDX: 41117683Spst return X_ATOM; 41217683Spst 41317683Spst case BPF_ST: 41417683Spst case BPF_STX: 41517683Spst return s->k; 41617683Spst 41717683Spst case BPF_MISC: 41817683Spst return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM; 41917683Spst } 42017683Spst return -1; 42117683Spst} 42217683Spst 42317683Spststatic void 42417683Spstcompute_local_ud(b) 42517683Spst struct block *b; 42617683Spst{ 42717683Spst struct slist *s; 42817683Spst atomset def = 0, use = 0, kill = 0; 42917683Spst int atom; 43017683Spst 43117683Spst for (s = b->stmts; s; s = s->next) { 43217683Spst if (s->s.code == NOP) 43317683Spst continue; 43417683Spst atom = atomuse(&s->s); 43517683Spst if (atom >= 0) { 43617683Spst if (atom == AX_ATOM) { 43717683Spst if (!ATOMELEM(def, X_ATOM)) 43817683Spst use |= ATOMMASK(X_ATOM); 43917683Spst if (!ATOMELEM(def, A_ATOM)) 44017683Spst use |= ATOMMASK(A_ATOM); 44117683Spst } 44217683Spst else if (atom < N_ATOMS) { 44317683Spst if (!ATOMELEM(def, atom)) 44417683Spst use |= ATOMMASK(atom); 44517683Spst } 44617683Spst else 44717683Spst abort(); 44817683Spst } 44917683Spst atom = atomdef(&s->s); 45017683Spst if (atom >= 0) { 45117683Spst if (!ATOMELEM(use, atom)) 45217683Spst kill |= ATOMMASK(atom); 45317683Spst def |= ATOMMASK(atom); 45417683Spst } 45517683Spst } 45617683Spst if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP) 45717683Spst use |= ATOMMASK(A_ATOM); 45817683Spst 45917683Spst b->def = def; 46017683Spst b->kill = kill; 46117683Spst b->in_use = use; 46217683Spst} 46317683Spst 46417683Spst/* 46517683Spst * Assume graph is already leveled. 46617683Spst */ 46717683Spststatic void 46817683Spstfind_ud(root) 46917683Spst struct block *root; 47017683Spst{ 47117683Spst int i, maxlevel; 47217683Spst struct block *p; 47317683Spst 47417683Spst /* 47517683Spst * root->level is the highest level no found; 47617683Spst * count down from there. 47717683Spst */ 47817683Spst maxlevel = root->level; 47917683Spst for (i = maxlevel; i >= 0; --i) 48017683Spst for (p = levels[i]; p; p = p->link) { 48117683Spst compute_local_ud(p); 48217683Spst p->out_use = 0; 48317683Spst } 48417683Spst 48517683Spst for (i = 1; i <= maxlevel; ++i) { 48617683Spst for (p = levels[i]; p; p = p->link) { 48717683Spst p->out_use |= JT(p)->in_use | JF(p)->in_use; 48817683Spst p->in_use |= p->out_use &~ p->kill; 48917683Spst } 49017683Spst } 49117683Spst} 49217683Spst 49317683Spst/* 49417683Spst * These data structures are used in a Cocke and Shwarz style 49517683Spst * value numbering scheme. Since the flowgraph is acyclic, 49617683Spst * exit values can be propagated from a node's predecessors 49717683Spst * provided it is uniquely defined. 49817683Spst */ 49917683Spststruct valnode { 50017683Spst int code; 50117683Spst int v0, v1; 50217683Spst int val; 50317683Spst struct valnode *next; 50417683Spst}; 50517683Spst 50617683Spst#define MODULUS 213 50717683Spststatic struct valnode *hashtbl[MODULUS]; 50817683Spststatic int curval; 50917683Spststatic int maxval; 51017683Spst 51117683Spst/* Integer constants mapped with the load immediate opcode. */ 51217683Spst#define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L) 51317683Spst 51417683Spststruct vmapinfo { 51517683Spst int is_const; 51617683Spst bpf_int32 const_val; 51717683Spst}; 51817683Spst 51917683Spststruct vmapinfo *vmap; 52017683Spststruct valnode *vnode_base; 52117683Spststruct valnode *next_vnode; 52217683Spst 52317683Spststatic void 52417683Spstinit_val() 52517683Spst{ 52617683Spst curval = 0; 52717683Spst next_vnode = vnode_base; 52817683Spst memset((char *)vmap, 0, maxval * sizeof(*vmap)); 52917683Spst memset((char *)hashtbl, 0, sizeof hashtbl); 53017683Spst} 53117683Spst 53217683Spst/* Because we really don't have an IR, this stuff is a little messy. */ 53317683Spststatic int 53417683SpstF(code, v0, v1) 53517683Spst int code; 53617683Spst int v0, v1; 53717683Spst{ 53817683Spst u_int hash; 53917683Spst int val; 54017683Spst struct valnode *p; 54117683Spst 54217683Spst hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8); 54317683Spst hash %= MODULUS; 54417683Spst 54517683Spst for (p = hashtbl[hash]; p; p = p->next) 54617683Spst if (p->code == code && p->v0 == v0 && p->v1 == v1) 54717683Spst return p->val; 54817683Spst 54917683Spst val = ++curval; 55017683Spst if (BPF_MODE(code) == BPF_IMM && 55117683Spst (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) { 55217683Spst vmap[val].const_val = v0; 55317683Spst vmap[val].is_const = 1; 55417683Spst } 55517683Spst p = next_vnode++; 55617683Spst p->val = val; 55717683Spst p->code = code; 55817683Spst p->v0 = v0; 55917683Spst p->v1 = v1; 56017683Spst p->next = hashtbl[hash]; 56117683Spst hashtbl[hash] = p; 56217683Spst 56317683Spst return val; 56417683Spst} 56517683Spst 56617683Spststatic inline void 56717683Spstvstore(s, valp, newval, alter) 56817683Spst struct stmt *s; 56917683Spst int *valp; 57017683Spst int newval; 57117683Spst int alter; 57217683Spst{ 57317683Spst if (alter && *valp == newval) 57417683Spst s->code = NOP; 57517683Spst else 57617683Spst *valp = newval; 57717683Spst} 57817683Spst 57917683Spststatic void 58017683Spstfold_op(s, v0, v1) 58117683Spst struct stmt *s; 58217683Spst int v0, v1; 58317683Spst{ 58417683Spst bpf_int32 a, b; 58517683Spst 58617683Spst a = vmap[v0].const_val; 58717683Spst b = vmap[v1].const_val; 58817683Spst 58917683Spst switch (BPF_OP(s->code)) { 59017683Spst case BPF_ADD: 59117683Spst a += b; 59217683Spst break; 59317683Spst 59417683Spst case BPF_SUB: 59517683Spst a -= b; 59617683Spst break; 59717683Spst 59817683Spst case BPF_MUL: 59917683Spst a *= b; 60017683Spst break; 60117683Spst 60217683Spst case BPF_DIV: 60317683Spst if (b == 0) 60417683Spst bpf_error("division by zero"); 60517683Spst a /= b; 60617683Spst break; 60717683Spst 60817683Spst case BPF_AND: 60917683Spst a &= b; 61017683Spst break; 61117683Spst 61217683Spst case BPF_OR: 61317683Spst a |= b; 61417683Spst break; 61517683Spst 61617683Spst case BPF_LSH: 61717683Spst a <<= b; 61817683Spst break; 61917683Spst 62017683Spst case BPF_RSH: 62117683Spst a >>= b; 62217683Spst break; 62317683Spst 62417683Spst case BPF_NEG: 62517683Spst a = -a; 62617683Spst break; 62717683Spst 62817683Spst default: 62917683Spst abort(); 63017683Spst } 63117683Spst s->k = a; 63217683Spst s->code = BPF_LD|BPF_IMM; 63317683Spst done = 0; 63417683Spst} 63517683Spst 63617683Spststatic inline struct slist * 63717683Spstthis_op(s) 63817683Spst struct slist *s; 63917683Spst{ 64017683Spst while (s != 0 && s->s.code == NOP) 64117683Spst s = s->next; 64217683Spst return s; 64317683Spst} 64417683Spst 64517683Spststatic void 64617683Spstopt_not(b) 64717683Spst struct block *b; 64817683Spst{ 64917683Spst struct block *tmp = JT(b); 65017683Spst 65117683Spst JT(b) = JF(b); 65217683Spst JF(b) = tmp; 65317683Spst} 65417683Spst 65517683Spststatic void 65617683Spstopt_peep(b) 65717683Spst struct block *b; 65817683Spst{ 65917683Spst struct slist *s; 66017683Spst struct slist *next, *last; 66117683Spst int val; 66217683Spst 66317683Spst s = b->stmts; 66417683Spst if (s == 0) 66517683Spst return; 66617683Spst 66717683Spst last = s; 66817683Spst while (1) { 66917683Spst s = this_op(s); 67017683Spst if (s == 0) 67117683Spst break; 67217683Spst next = this_op(s->next); 67317683Spst if (next == 0) 67417683Spst break; 67517683Spst last = next; 67617683Spst 67717683Spst /* 67817683Spst * st M[k] --> st M[k] 67917683Spst * ldx M[k] tax 68017683Spst */ 68117683Spst if (s->s.code == BPF_ST && 68217683Spst next->s.code == (BPF_LDX|BPF_MEM) && 68317683Spst s->s.k == next->s.k) { 68417683Spst done = 0; 68517683Spst next->s.code = BPF_MISC|BPF_TAX; 68617683Spst } 68717683Spst /* 68817683Spst * ld #k --> ldx #k 68917683Spst * tax txa 69017683Spst */ 69117683Spst if (s->s.code == (BPF_LD|BPF_IMM) && 69217683Spst next->s.code == (BPF_MISC|BPF_TAX)) { 69317683Spst s->s.code = BPF_LDX|BPF_IMM; 69417683Spst next->s.code = BPF_MISC|BPF_TXA; 69517683Spst done = 0; 69617683Spst } 69717683Spst /* 69817683Spst * This is an ugly special case, but it happens 69917683Spst * when you say tcp[k] or udp[k] where k is a constant. 70017683Spst */ 70117683Spst if (s->s.code == (BPF_LD|BPF_IMM)) { 70217683Spst struct slist *add, *tax, *ild; 70317683Spst 70417683Spst /* 70517683Spst * Check that X isn't used on exit from this 70617683Spst * block (which the optimizer might cause). 70717683Spst * We know the code generator won't generate 70817683Spst * any local dependencies. 70917683Spst */ 71017683Spst if (ATOMELEM(b->out_use, X_ATOM)) 71117683Spst break; 71217683Spst 71317683Spst if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B)) 71417683Spst add = next; 71517683Spst else 71617683Spst add = this_op(next->next); 71717683Spst if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X)) 71817683Spst break; 71917683Spst 72017683Spst tax = this_op(add->next); 72117683Spst if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX)) 72217683Spst break; 72317683Spst 72417683Spst ild = this_op(tax->next); 72517683Spst if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD || 72617683Spst BPF_MODE(ild->s.code) != BPF_IND) 72717683Spst break; 72817683Spst /* 72917683Spst * XXX We need to check that X is not 73017683Spst * subsequently used. We know we can eliminate the 73117683Spst * accumulator modifications since it is defined 73217683Spst * by the last stmt of this sequence. 73317683Spst * 73417683Spst * We want to turn this sequence: 73517683Spst * 73617683Spst * (004) ldi #0x2 {s} 73717683Spst * (005) ldxms [14] {next} -- optional 73817683Spst * (006) addx {add} 73917683Spst * (007) tax {tax} 74017683Spst * (008) ild [x+0] {ild} 74117683Spst * 74217683Spst * into this sequence: 74317683Spst * 74417683Spst * (004) nop 74517683Spst * (005) ldxms [14] 74617683Spst * (006) nop 74717683Spst * (007) nop 74817683Spst * (008) ild [x+2] 74917683Spst * 75017683Spst */ 75117683Spst ild->s.k += s->s.k; 75217683Spst s->s.code = NOP; 75317683Spst add->s.code = NOP; 75417683Spst tax->s.code = NOP; 75517683Spst done = 0; 75617683Spst } 75717683Spst s = next; 75817683Spst } 75917683Spst /* 76017683Spst * If we have a subtract to do a comparison, and the X register 76117683Spst * is a known constant, we can merge this value into the 76217683Spst * comparison. 76317683Spst */ 76417683Spst if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) && 76517683Spst !ATOMELEM(b->out_use, A_ATOM)) { 76617683Spst val = b->val[X_ATOM]; 76717683Spst if (vmap[val].is_const) { 76817683Spst int op; 76917683Spst 77017683Spst b->s.k += vmap[val].const_val; 77117683Spst op = BPF_OP(b->s.code); 77217683Spst if (op == BPF_JGT || op == BPF_JGE) { 77317683Spst struct block *t = JT(b); 77417683Spst JT(b) = JF(b); 77517683Spst JF(b) = t; 77617683Spst b->s.k += 0x80000000; 77717683Spst } 77817683Spst last->s.code = NOP; 77917683Spst done = 0; 78017683Spst } else if (b->s.k == 0) { 78117683Spst /* 78217683Spst * sub x -> nop 78317683Spst * j #0 j x 78417683Spst */ 78517683Spst last->s.code = NOP; 78617683Spst b->s.code = BPF_CLASS(b->s.code) | BPF_OP(b->s.code) | 78717683Spst BPF_X; 78817683Spst done = 0; 78917683Spst } 79017683Spst } 79117683Spst /* 79217683Spst * Likewise, a constant subtract can be simplified. 79317683Spst */ 79417683Spst else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) && 79517683Spst !ATOMELEM(b->out_use, A_ATOM)) { 79617683Spst int op; 79717683Spst 79817683Spst b->s.k += last->s.k; 79917683Spst last->s.code = NOP; 80017683Spst op = BPF_OP(b->s.code); 80117683Spst if (op == BPF_JGT || op == BPF_JGE) { 80217683Spst struct block *t = JT(b); 80317683Spst JT(b) = JF(b); 80417683Spst JF(b) = t; 80517683Spst b->s.k += 0x80000000; 80617683Spst } 80717683Spst done = 0; 80817683Spst } 80917683Spst /* 81017683Spst * and #k nop 81117683Spst * jeq #0 -> jset #k 81217683Spst */ 81317683Spst if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) && 81417683Spst !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) { 81517683Spst b->s.k = last->s.k; 81617683Spst b->s.code = BPF_JMP|BPF_K|BPF_JSET; 81717683Spst last->s.code = NOP; 81817683Spst done = 0; 81917683Spst opt_not(b); 82017683Spst } 82117683Spst /* 82217683Spst * If the accumulator is a known constant, we can compute the 82317683Spst * comparison result. 82417683Spst */ 82517683Spst val = b->val[A_ATOM]; 82617683Spst if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) { 82717683Spst bpf_int32 v = vmap[val].const_val; 82817683Spst switch (BPF_OP(b->s.code)) { 82917683Spst 83017683Spst case BPF_JEQ: 83117683Spst v = v == b->s.k; 83217683Spst break; 83317683Spst 83417683Spst case BPF_JGT: 83517683Spst v = (unsigned)v > b->s.k; 83617683Spst break; 83717683Spst 83817683Spst case BPF_JGE: 83917683Spst v = (unsigned)v >= b->s.k; 84017683Spst break; 84117683Spst 84217683Spst case BPF_JSET: 84317683Spst v &= b->s.k; 84417683Spst break; 84517683Spst 84617683Spst default: 84717683Spst abort(); 84817683Spst } 84917683Spst if (JF(b) != JT(b)) 85017683Spst done = 0; 85117683Spst if (v) 85217683Spst JF(b) = JT(b); 85317683Spst else 85417683Spst JT(b) = JF(b); 85517683Spst } 85617683Spst} 85717683Spst 85817683Spst/* 85917683Spst * Compute the symbolic value of expression of 's', and update 86017683Spst * anything it defines in the value table 'val'. If 'alter' is true, 86117683Spst * do various optimizations. This code would be cleaner if symbolic 86217683Spst * evaluation and code transformations weren't folded together. 86317683Spst */ 86417683Spststatic void 86517683Spstopt_stmt(s, val, alter) 86617683Spst struct stmt *s; 86717683Spst int val[]; 86817683Spst int alter; 86917683Spst{ 87017683Spst int op; 87117683Spst int v; 87217683Spst 87317683Spst switch (s->code) { 87417683Spst 87517683Spst case BPF_LD|BPF_ABS|BPF_W: 87617683Spst case BPF_LD|BPF_ABS|BPF_H: 87717683Spst case BPF_LD|BPF_ABS|BPF_B: 87817683Spst v = F(s->code, s->k, 0L); 87917683Spst vstore(s, &val[A_ATOM], v, alter); 88017683Spst break; 88117683Spst 88217683Spst case BPF_LD|BPF_IND|BPF_W: 88317683Spst case BPF_LD|BPF_IND|BPF_H: 88417683Spst case BPF_LD|BPF_IND|BPF_B: 88517683Spst v = val[X_ATOM]; 88617683Spst if (alter && vmap[v].is_const) { 88717683Spst s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code); 88817683Spst s->k += vmap[v].const_val; 88917683Spst v = F(s->code, s->k, 0L); 89017683Spst done = 0; 89117683Spst } 89217683Spst else 89317683Spst v = F(s->code, s->k, v); 89417683Spst vstore(s, &val[A_ATOM], v, alter); 89517683Spst break; 89617683Spst 89717683Spst case BPF_LD|BPF_LEN: 89817683Spst v = F(s->code, 0L, 0L); 89917683Spst vstore(s, &val[A_ATOM], v, alter); 90017683Spst break; 90117683Spst 90217683Spst case BPF_LD|BPF_IMM: 90317683Spst v = K(s->k); 90417683Spst vstore(s, &val[A_ATOM], v, alter); 90517683Spst break; 90617683Spst 90717683Spst case BPF_LDX|BPF_IMM: 90817683Spst v = K(s->k); 90917683Spst vstore(s, &val[X_ATOM], v, alter); 91017683Spst break; 91117683Spst 91217683Spst case BPF_LDX|BPF_MSH|BPF_B: 91317683Spst v = F(s->code, s->k, 0L); 91417683Spst vstore(s, &val[X_ATOM], v, alter); 91517683Spst break; 91617683Spst 91717683Spst case BPF_ALU|BPF_NEG: 91817683Spst if (alter && vmap[val[A_ATOM]].is_const) { 91917683Spst s->code = BPF_LD|BPF_IMM; 92017683Spst s->k = -vmap[val[A_ATOM]].const_val; 92117683Spst val[A_ATOM] = K(s->k); 92217683Spst } 92317683Spst else 92417683Spst val[A_ATOM] = F(s->code, val[A_ATOM], 0L); 92517683Spst break; 92617683Spst 92717683Spst case BPF_ALU|BPF_ADD|BPF_K: 92817683Spst case BPF_ALU|BPF_SUB|BPF_K: 92917683Spst case BPF_ALU|BPF_MUL|BPF_K: 93017683Spst case BPF_ALU|BPF_DIV|BPF_K: 93117683Spst case BPF_ALU|BPF_AND|BPF_K: 93217683Spst case BPF_ALU|BPF_OR|BPF_K: 93317683Spst case BPF_ALU|BPF_LSH|BPF_K: 93417683Spst case BPF_ALU|BPF_RSH|BPF_K: 93517683Spst op = BPF_OP(s->code); 93617683Spst if (alter) { 93717683Spst if (s->k == 0) { 93817683Spst if (op == BPF_ADD || op == BPF_SUB || 93917683Spst op == BPF_LSH || op == BPF_RSH || 94017683Spst op == BPF_OR) { 94117683Spst s->code = NOP; 94217683Spst break; 94317683Spst } 94417683Spst if (op == BPF_MUL || op == BPF_AND) { 94517683Spst s->code = BPF_LD|BPF_IMM; 94617683Spst val[A_ATOM] = K(s->k); 94717683Spst break; 94817683Spst } 94917683Spst } 95017683Spst if (vmap[val[A_ATOM]].is_const) { 95117683Spst fold_op(s, val[A_ATOM], K(s->k)); 95217683Spst val[A_ATOM] = K(s->k); 95317683Spst break; 95417683Spst } 95517683Spst } 95617683Spst val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k)); 95717683Spst break; 95817683Spst 95917683Spst case BPF_ALU|BPF_ADD|BPF_X: 96017683Spst case BPF_ALU|BPF_SUB|BPF_X: 96117683Spst case BPF_ALU|BPF_MUL|BPF_X: 96217683Spst case BPF_ALU|BPF_DIV|BPF_X: 96317683Spst case BPF_ALU|BPF_AND|BPF_X: 96417683Spst case BPF_ALU|BPF_OR|BPF_X: 96517683Spst case BPF_ALU|BPF_LSH|BPF_X: 96617683Spst case BPF_ALU|BPF_RSH|BPF_X: 96717683Spst op = BPF_OP(s->code); 96817683Spst if (alter && vmap[val[X_ATOM]].is_const) { 96917683Spst if (vmap[val[A_ATOM]].is_const) { 97017683Spst fold_op(s, val[A_ATOM], val[X_ATOM]); 97117683Spst val[A_ATOM] = K(s->k); 97217683Spst } 97317683Spst else { 97417683Spst s->code = BPF_ALU|BPF_K|op; 97517683Spst s->k = vmap[val[X_ATOM]].const_val; 97617683Spst done = 0; 97717683Spst val[A_ATOM] = 97817683Spst F(s->code, val[A_ATOM], K(s->k)); 97917683Spst } 98017683Spst break; 98117683Spst } 98217683Spst /* 98317683Spst * Check if we're doing something to an accumulator 98417683Spst * that is 0, and simplify. This may not seem like 98517683Spst * much of a simplification but it could open up further 98617683Spst * optimizations. 98717683Spst * XXX We could also check for mul by 1, and -1, etc. 98817683Spst */ 98917683Spst if (alter && vmap[val[A_ATOM]].is_const 99017683Spst && vmap[val[A_ATOM]].const_val == 0) { 99117683Spst if (op == BPF_ADD || op == BPF_OR || 99217683Spst op == BPF_LSH || op == BPF_RSH || op == BPF_SUB) { 99317683Spst s->code = BPF_MISC|BPF_TXA; 99417683Spst vstore(s, &val[A_ATOM], val[X_ATOM], alter); 99517683Spst break; 99617683Spst } 99717683Spst else if (op == BPF_MUL || op == BPF_DIV || 99817683Spst op == BPF_AND) { 99917683Spst s->code = BPF_LD|BPF_IMM; 100017683Spst s->k = 0; 100117683Spst vstore(s, &val[A_ATOM], K(s->k), alter); 100217683Spst break; 100317683Spst } 100417683Spst else if (op == BPF_NEG) { 100517683Spst s->code = NOP; 100617683Spst break; 100717683Spst } 100817683Spst } 100917683Spst val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]); 101017683Spst break; 101117683Spst 101217683Spst case BPF_MISC|BPF_TXA: 101317683Spst vstore(s, &val[A_ATOM], val[X_ATOM], alter); 101417683Spst break; 101517683Spst 101617683Spst case BPF_LD|BPF_MEM: 101717683Spst v = val[s->k]; 101817683Spst if (alter && vmap[v].is_const) { 101917683Spst s->code = BPF_LD|BPF_IMM; 102017683Spst s->k = vmap[v].const_val; 102117683Spst done = 0; 102217683Spst } 102317683Spst vstore(s, &val[A_ATOM], v, alter); 102417683Spst break; 102517683Spst 102617683Spst case BPF_MISC|BPF_TAX: 102717683Spst vstore(s, &val[X_ATOM], val[A_ATOM], alter); 102817683Spst break; 102917683Spst 103017683Spst case BPF_LDX|BPF_MEM: 103117683Spst v = val[s->k]; 103217683Spst if (alter && vmap[v].is_const) { 103317683Spst s->code = BPF_LDX|BPF_IMM; 103417683Spst s->k = vmap[v].const_val; 103517683Spst done = 0; 103617683Spst } 103717683Spst vstore(s, &val[X_ATOM], v, alter); 103817683Spst break; 103917683Spst 104017683Spst case BPF_ST: 104117683Spst vstore(s, &val[s->k], val[A_ATOM], alter); 104217683Spst break; 104317683Spst 104417683Spst case BPF_STX: 104517683Spst vstore(s, &val[s->k], val[X_ATOM], alter); 104617683Spst break; 104717683Spst } 104817683Spst} 104917683Spst 105017683Spststatic void 105117683Spstdeadstmt(s, last) 105217683Spst register struct stmt *s; 105317683Spst register struct stmt *last[]; 105417683Spst{ 105517683Spst register int atom; 105617683Spst 105717683Spst atom = atomuse(s); 105817683Spst if (atom >= 0) { 105917683Spst if (atom == AX_ATOM) { 106017683Spst last[X_ATOM] = 0; 106117683Spst last[A_ATOM] = 0; 106217683Spst } 106317683Spst else 106417683Spst last[atom] = 0; 106517683Spst } 106617683Spst atom = atomdef(s); 106717683Spst if (atom >= 0) { 106817683Spst if (last[atom]) { 106917683Spst done = 0; 107017683Spst last[atom]->code = NOP; 107117683Spst } 107217683Spst last[atom] = s; 107317683Spst } 107417683Spst} 107517683Spst 107617683Spststatic void 107717683Spstopt_deadstores(b) 107817683Spst register struct block *b; 107917683Spst{ 108017683Spst register struct slist *s; 108117683Spst register int atom; 108217683Spst struct stmt *last[N_ATOMS]; 108317683Spst 108417683Spst memset((char *)last, 0, sizeof last); 108517683Spst 108617683Spst for (s = b->stmts; s != 0; s = s->next) 108717683Spst deadstmt(&s->s, last); 108817683Spst deadstmt(&b->s, last); 108917683Spst 109017683Spst for (atom = 0; atom < N_ATOMS; ++atom) 109117683Spst if (last[atom] && !ATOMELEM(b->out_use, atom)) { 109217683Spst last[atom]->code = NOP; 109317683Spst done = 0; 109417683Spst } 109517683Spst} 109617683Spst 109717683Spststatic void 109817683Spstopt_blk(b, do_stmts) 109917683Spst struct block *b; 110017683Spst int do_stmts; 110117683Spst{ 110217683Spst struct slist *s; 110317683Spst struct edge *p; 110417683Spst int i; 110517683Spst bpf_int32 aval; 110617683Spst 110756889Sfenner#if 0 110856889Sfenner for (s = b->stmts; s && s->next; s = s->next) 110956889Sfenner if (BPF_CLASS(s->s.code) == BPF_JMP) { 111056889Sfenner do_stmts = 0; 111156889Sfenner break; 111256889Sfenner } 111356889Sfenner#endif 111456889Sfenner 111517683Spst /* 111617683Spst * Initialize the atom values. 111717683Spst * If we have no predecessors, everything is undefined. 111817683Spst * Otherwise, we inherent our values from our predecessors. 111917683Spst * If any register has an ambiguous value (i.e. control paths are 112017683Spst * merging) give it the undefined value of 0. 112117683Spst */ 112217683Spst p = b->in_edges; 112317683Spst if (p == 0) 112417683Spst memset((char *)b->val, 0, sizeof(b->val)); 112517683Spst else { 112617683Spst memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val)); 112717683Spst while ((p = p->next) != NULL) { 112817683Spst for (i = 0; i < N_ATOMS; ++i) 112917683Spst if (b->val[i] != p->pred->val[i]) 113017683Spst b->val[i] = 0; 113117683Spst } 113217683Spst } 113317683Spst aval = b->val[A_ATOM]; 113417683Spst for (s = b->stmts; s; s = s->next) 113517683Spst opt_stmt(&s->s, b->val, do_stmts); 113617683Spst 113717683Spst /* 113817683Spst * This is a special case: if we don't use anything from this 113917683Spst * block, and we load the accumulator with value that is 114017683Spst * already there, or if this block is a return, 114117683Spst * eliminate all the statements. 114217683Spst */ 114317683Spst if (do_stmts && 114417683Spst ((b->out_use == 0 && aval != 0 &&b->val[A_ATOM] == aval) || 114517683Spst BPF_CLASS(b->s.code) == BPF_RET)) { 114617683Spst if (b->stmts != 0) { 114717683Spst b->stmts = 0; 114817683Spst done = 0; 114917683Spst } 115017683Spst } else { 115117683Spst opt_peep(b); 115217683Spst opt_deadstores(b); 115317683Spst } 115417683Spst /* 115517683Spst * Set up values for branch optimizer. 115617683Spst */ 115717683Spst if (BPF_SRC(b->s.code) == BPF_K) 115817683Spst b->oval = K(b->s.k); 115917683Spst else 116017683Spst b->oval = b->val[X_ATOM]; 116117683Spst b->et.code = b->s.code; 116217683Spst b->ef.code = -b->s.code; 116317683Spst} 116417683Spst 116517683Spst/* 116617683Spst * Return true if any register that is used on exit from 'succ', has 116717683Spst * an exit value that is different from the corresponding exit value 116817683Spst * from 'b'. 116917683Spst */ 117017683Spststatic int 117117683Spstuse_conflict(b, succ) 117217683Spst struct block *b, *succ; 117317683Spst{ 117417683Spst int atom; 117517683Spst atomset use = succ->out_use; 117617683Spst 117717683Spst if (use == 0) 117817683Spst return 0; 117917683Spst 118017683Spst for (atom = 0; atom < N_ATOMS; ++atom) 118117683Spst if (ATOMELEM(use, atom)) 118217683Spst if (b->val[atom] != succ->val[atom]) 118317683Spst return 1; 118417683Spst return 0; 118517683Spst} 118617683Spst 118717683Spststatic struct block * 118817683Spstfold_edge(child, ep) 118917683Spst struct block *child; 119017683Spst struct edge *ep; 119117683Spst{ 119217683Spst int sense; 119317683Spst int aval0, aval1, oval0, oval1; 119417683Spst int code = ep->code; 119517683Spst 119617683Spst if (code < 0) { 119717683Spst code = -code; 119817683Spst sense = 0; 119917683Spst } else 120017683Spst sense = 1; 120117683Spst 120217683Spst if (child->s.code != code) 120317683Spst return 0; 120417683Spst 120517683Spst aval0 = child->val[A_ATOM]; 120617683Spst oval0 = child->oval; 120717683Spst aval1 = ep->pred->val[A_ATOM]; 120817683Spst oval1 = ep->pred->oval; 120917683Spst 121017683Spst if (aval0 != aval1) 121117683Spst return 0; 121217683Spst 121317683Spst if (oval0 == oval1) 121417683Spst /* 121517683Spst * The operands are identical, so the 121617683Spst * result is true if a true branch was 121717683Spst * taken to get here, otherwise false. 121817683Spst */ 121917683Spst return sense ? JT(child) : JF(child); 122017683Spst 122117683Spst if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K)) 122217683Spst /* 122317683Spst * At this point, we only know the comparison if we 122417683Spst * came down the true branch, and it was an equality 122517683Spst * comparison with a constant. We rely on the fact that 122617683Spst * distinct constants have distinct value numbers. 122717683Spst */ 122817683Spst return JF(child); 122917683Spst 123017683Spst return 0; 123117683Spst} 123217683Spst 123317683Spststatic void 123417683Spstopt_j(ep) 123517683Spst struct edge *ep; 123617683Spst{ 123717683Spst register int i, k; 123817683Spst register struct block *target; 123917683Spst 124017683Spst if (JT(ep->succ) == 0) 124117683Spst return; 124217683Spst 124317683Spst if (JT(ep->succ) == JF(ep->succ)) { 124417683Spst /* 124517683Spst * Common branch targets can be eliminated, provided 124617683Spst * there is no data dependency. 124717683Spst */ 124817683Spst if (!use_conflict(ep->pred, ep->succ->et.succ)) { 124917683Spst done = 0; 125017683Spst ep->succ = JT(ep->succ); 125117683Spst } 125217683Spst } 125317683Spst /* 125417683Spst * For each edge dominator that matches the successor of this 125517683Spst * edge, promote the edge successor to the its grandchild. 125617683Spst * 125717683Spst * XXX We violate the set abstraction here in favor a reasonably 125817683Spst * efficient loop. 125917683Spst */ 126017683Spst top: 126117683Spst for (i = 0; i < edgewords; ++i) { 126217683Spst register bpf_u_int32 x = ep->edom[i]; 126317683Spst 126417683Spst while (x != 0) { 126517683Spst k = ffs(x) - 1; 126617683Spst x &=~ (1 << k); 126717683Spst k += i * BITS_PER_WORD; 126817683Spst 126917683Spst target = fold_edge(ep->succ, edges[k]); 127017683Spst /* 127117683Spst * Check that there is no data dependency between 127217683Spst * nodes that will be violated if we move the edge. 127317683Spst */ 127417683Spst if (target != 0 && !use_conflict(ep->pred, target)) { 127517683Spst done = 0; 127617683Spst ep->succ = target; 127717683Spst if (JT(target) != 0) 127817683Spst /* 127917683Spst * Start over unless we hit a leaf. 128017683Spst */ 128117683Spst goto top; 128217683Spst return; 128317683Spst } 128417683Spst } 128517683Spst } 128617683Spst} 128717683Spst 128817683Spst 128917683Spststatic void 129017683Spstor_pullup(b) 129117683Spst struct block *b; 129217683Spst{ 129317683Spst int val, at_top; 129417683Spst struct block *pull; 129517683Spst struct block **diffp, **samep; 129617683Spst struct edge *ep; 129717683Spst 129817683Spst ep = b->in_edges; 129917683Spst if (ep == 0) 130017683Spst return; 130117683Spst 130217683Spst /* 130317683Spst * Make sure each predecessor loads the same value. 130417683Spst * XXX why? 130517683Spst */ 130617683Spst val = ep->pred->val[A_ATOM]; 130717683Spst for (ep = ep->next; ep != 0; ep = ep->next) 130817683Spst if (val != ep->pred->val[A_ATOM]) 130917683Spst return; 131017683Spst 131117683Spst if (JT(b->in_edges->pred) == b) 131217683Spst diffp = &JT(b->in_edges->pred); 131317683Spst else 131417683Spst diffp = &JF(b->in_edges->pred); 131517683Spst 131617683Spst at_top = 1; 131717683Spst while (1) { 131817683Spst if (*diffp == 0) 131917683Spst return; 132017683Spst 132117683Spst if (JT(*diffp) != JT(b)) 132217683Spst return; 132317683Spst 132417683Spst if (!SET_MEMBER((*diffp)->dom, b->id)) 132517683Spst return; 132617683Spst 132717683Spst if ((*diffp)->val[A_ATOM] != val) 132817683Spst break; 132917683Spst 133017683Spst diffp = &JF(*diffp); 133117683Spst at_top = 0; 133217683Spst } 133317683Spst samep = &JF(*diffp); 133417683Spst while (1) { 133517683Spst if (*samep == 0) 133617683Spst return; 133717683Spst 133817683Spst if (JT(*samep) != JT(b)) 133917683Spst return; 134017683Spst 134117683Spst if (!SET_MEMBER((*samep)->dom, b->id)) 134217683Spst return; 134317683Spst 134417683Spst if ((*samep)->val[A_ATOM] == val) 134517683Spst break; 134617683Spst 134717683Spst /* XXX Need to check that there are no data dependencies 134817683Spst between dp0 and dp1. Currently, the code generator 134917683Spst will not produce such dependencies. */ 135017683Spst samep = &JF(*samep); 135117683Spst } 135217683Spst#ifdef notdef 135317683Spst /* XXX This doesn't cover everything. */ 135417683Spst for (i = 0; i < N_ATOMS; ++i) 135517683Spst if ((*samep)->val[i] != pred->val[i]) 135617683Spst return; 135717683Spst#endif 135817683Spst /* Pull up the node. */ 135917683Spst pull = *samep; 136017683Spst *samep = JF(pull); 136117683Spst JF(pull) = *diffp; 136217683Spst 136317683Spst /* 136417683Spst * At the top of the chain, each predecessor needs to point at the 136517683Spst * pulled up node. Inside the chain, there is only one predecessor 136617683Spst * to worry about. 136717683Spst */ 136817683Spst if (at_top) { 136917683Spst for (ep = b->in_edges; ep != 0; ep = ep->next) { 137017683Spst if (JT(ep->pred) == b) 137117683Spst JT(ep->pred) = pull; 137217683Spst else 137317683Spst JF(ep->pred) = pull; 137417683Spst } 137517683Spst } 137617683Spst else 137717683Spst *diffp = pull; 137817683Spst 137917683Spst done = 0; 138017683Spst} 138117683Spst 138217683Spststatic void 138317683Spstand_pullup(b) 138417683Spst struct block *b; 138517683Spst{ 138617683Spst int val, at_top; 138717683Spst struct block *pull; 138817683Spst struct block **diffp, **samep; 138917683Spst struct edge *ep; 139017683Spst 139117683Spst ep = b->in_edges; 139217683Spst if (ep == 0) 139317683Spst return; 139417683Spst 139517683Spst /* 139617683Spst * Make sure each predecessor loads the same value. 139717683Spst */ 139817683Spst val = ep->pred->val[A_ATOM]; 139917683Spst for (ep = ep->next; ep != 0; ep = ep->next) 140017683Spst if (val != ep->pred->val[A_ATOM]) 140117683Spst return; 140217683Spst 140317683Spst if (JT(b->in_edges->pred) == b) 140417683Spst diffp = &JT(b->in_edges->pred); 140517683Spst else 140617683Spst diffp = &JF(b->in_edges->pred); 140717683Spst 140817683Spst at_top = 1; 140917683Spst while (1) { 141017683Spst if (*diffp == 0) 141117683Spst return; 141217683Spst 141317683Spst if (JF(*diffp) != JF(b)) 141417683Spst return; 141517683Spst 141617683Spst if (!SET_MEMBER((*diffp)->dom, b->id)) 141717683Spst return; 141817683Spst 141917683Spst if ((*diffp)->val[A_ATOM] != val) 142017683Spst break; 142117683Spst 142217683Spst diffp = &JT(*diffp); 142317683Spst at_top = 0; 142417683Spst } 142517683Spst samep = &JT(*diffp); 142617683Spst while (1) { 142717683Spst if (*samep == 0) 142817683Spst return; 142917683Spst 143017683Spst if (JF(*samep) != JF(b)) 143117683Spst return; 143217683Spst 143317683Spst if (!SET_MEMBER((*samep)->dom, b->id)) 143417683Spst return; 143517683Spst 143617683Spst if ((*samep)->val[A_ATOM] == val) 143717683Spst break; 143817683Spst 143917683Spst /* XXX Need to check that there are no data dependencies 144017683Spst between diffp and samep. Currently, the code generator 144117683Spst will not produce such dependencies. */ 144217683Spst samep = &JT(*samep); 144317683Spst } 144417683Spst#ifdef notdef 144517683Spst /* XXX This doesn't cover everything. */ 144617683Spst for (i = 0; i < N_ATOMS; ++i) 144717683Spst if ((*samep)->val[i] != pred->val[i]) 144817683Spst return; 144917683Spst#endif 145017683Spst /* Pull up the node. */ 145117683Spst pull = *samep; 145217683Spst *samep = JT(pull); 145317683Spst JT(pull) = *diffp; 145417683Spst 145517683Spst /* 145617683Spst * At the top of the chain, each predecessor needs to point at the 145717683Spst * pulled up node. Inside the chain, there is only one predecessor 145817683Spst * to worry about. 145917683Spst */ 146017683Spst if (at_top) { 146117683Spst for (ep = b->in_edges; ep != 0; ep = ep->next) { 146217683Spst if (JT(ep->pred) == b) 146317683Spst JT(ep->pred) = pull; 146417683Spst else 146517683Spst JF(ep->pred) = pull; 146617683Spst } 146717683Spst } 146817683Spst else 146917683Spst *diffp = pull; 147017683Spst 147117683Spst done = 0; 147217683Spst} 147317683Spst 147417683Spststatic void 147517683Spstopt_blks(root, do_stmts) 147617683Spst struct block *root; 147717683Spst int do_stmts; 147817683Spst{ 147917683Spst int i, maxlevel; 148017683Spst struct block *p; 148117683Spst 148217683Spst init_val(); 148317683Spst maxlevel = root->level; 148417683Spst for (i = maxlevel; i >= 0; --i) 148517683Spst for (p = levels[i]; p; p = p->link) 148617683Spst opt_blk(p, do_stmts); 148717683Spst 148817683Spst if (do_stmts) 148917683Spst /* 149017683Spst * No point trying to move branches; it can't possibly 149117683Spst * make a difference at this point. 149217683Spst */ 149317683Spst return; 149417683Spst 149517683Spst for (i = 1; i <= maxlevel; ++i) { 149617683Spst for (p = levels[i]; p; p = p->link) { 149717683Spst opt_j(&p->et); 149817683Spst opt_j(&p->ef); 149917683Spst } 150017683Spst } 150117683Spst for (i = 1; i <= maxlevel; ++i) { 150217683Spst for (p = levels[i]; p; p = p->link) { 150317683Spst or_pullup(p); 150417683Spst and_pullup(p); 150517683Spst } 150617683Spst } 150717683Spst} 150817683Spst 150917683Spststatic inline void 151017683Spstlink_inedge(parent, child) 151117683Spst struct edge *parent; 151217683Spst struct block *child; 151317683Spst{ 151417683Spst parent->next = child->in_edges; 151517683Spst child->in_edges = parent; 151617683Spst} 151717683Spst 151817683Spststatic void 151917683Spstfind_inedges(root) 152017683Spst struct block *root; 152117683Spst{ 152217683Spst int i; 152317683Spst struct block *b; 152417683Spst 152517683Spst for (i = 0; i < n_blocks; ++i) 152617683Spst blocks[i]->in_edges = 0; 152717683Spst 152817683Spst /* 152917683Spst * Traverse the graph, adding each edge to the predecessor 153017683Spst * list of its successors. Skip the leaves (i.e. level 0). 153117683Spst */ 153217683Spst for (i = root->level; i > 0; --i) { 153317683Spst for (b = levels[i]; b != 0; b = b->link) { 153417683Spst link_inedge(&b->et, JT(b)); 153517683Spst link_inedge(&b->ef, JF(b)); 153617683Spst } 153717683Spst } 153817683Spst} 153917683Spst 154017683Spststatic void 154117683Spstopt_root(b) 154217683Spst struct block **b; 154317683Spst{ 154417683Spst struct slist *tmp, *s; 154517683Spst 154617683Spst s = (*b)->stmts; 154717683Spst (*b)->stmts = 0; 154817683Spst while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b)) 154917683Spst *b = JT(*b); 155017683Spst 155117683Spst tmp = (*b)->stmts; 155217683Spst if (tmp != 0) 155317683Spst sappend(s, tmp); 155417683Spst (*b)->stmts = s; 155517683Spst 155617683Spst /* 155717683Spst * If the root node is a return, then there is no 155817683Spst * point executing any statements (since the bpf machine 155917683Spst * has no side effects). 156017683Spst */ 156117683Spst if (BPF_CLASS((*b)->s.code) == BPF_RET) 156217683Spst (*b)->stmts = 0; 156317683Spst} 156417683Spst 156517683Spststatic void 156617683Spstopt_loop(root, do_stmts) 156717683Spst struct block *root; 156817683Spst int do_stmts; 156917683Spst{ 157017683Spst 157117683Spst#ifdef BDEBUG 157217683Spst if (dflag > 1) 157317683Spst opt_dump(root); 157417683Spst#endif 157517683Spst do { 157617683Spst done = 1; 157717683Spst find_levels(root); 157817683Spst find_dom(root); 157917683Spst find_closure(root); 158017683Spst find_inedges(root); 158117683Spst find_ud(root); 158217683Spst find_edom(root); 158317683Spst opt_blks(root, do_stmts); 158417683Spst#ifdef BDEBUG 158517683Spst if (dflag > 1) 158617683Spst opt_dump(root); 158717683Spst#endif 158817683Spst } while (!done); 158917683Spst} 159017683Spst 159117683Spst/* 159217683Spst * Optimize the filter code in its dag representation. 159317683Spst */ 159417683Spstvoid 159517683Spstbpf_optimize(rootp) 159617683Spst struct block **rootp; 159717683Spst{ 159817683Spst struct block *root; 159917683Spst 160017683Spst root = *rootp; 160117683Spst 160217683Spst opt_init(root); 160317683Spst opt_loop(root, 0); 160417683Spst opt_loop(root, 1); 160517683Spst intern_blocks(root); 160617683Spst opt_root(rootp); 160717683Spst opt_cleanup(); 160817683Spst} 160917683Spst 161017683Spststatic void 161117683Spstmake_marks(p) 161217683Spst struct block *p; 161317683Spst{ 161417683Spst if (!isMarked(p)) { 161517683Spst Mark(p); 161617683Spst if (BPF_CLASS(p->s.code) != BPF_RET) { 161717683Spst make_marks(JT(p)); 161817683Spst make_marks(JF(p)); 161917683Spst } 162017683Spst } 162117683Spst} 162217683Spst 162317683Spst/* 162417683Spst * Mark code array such that isMarked(i) is true 162517683Spst * only for nodes that are alive. 162617683Spst */ 162717683Spststatic void 162817683Spstmark_code(p) 162917683Spst struct block *p; 163017683Spst{ 163117683Spst cur_mark += 1; 163217683Spst make_marks(p); 163317683Spst} 163417683Spst 163517683Spst/* 163617683Spst * True iff the two stmt lists load the same value from the packet into 163717683Spst * the accumulator. 163817683Spst */ 163917683Spststatic int 164017683Spsteq_slist(x, y) 164117683Spst struct slist *x, *y; 164217683Spst{ 164317683Spst while (1) { 164417683Spst while (x && x->s.code == NOP) 164517683Spst x = x->next; 164617683Spst while (y && y->s.code == NOP) 164717683Spst y = y->next; 164817683Spst if (x == 0) 164917683Spst return y == 0; 165017683Spst if (y == 0) 165117683Spst return x == 0; 165217683Spst if (x->s.code != y->s.code || x->s.k != y->s.k) 165317683Spst return 0; 165417683Spst x = x->next; 165517683Spst y = y->next; 165617683Spst } 165717683Spst} 165817683Spst 165917683Spststatic inline int 166017683Spsteq_blk(b0, b1) 166117683Spst struct block *b0, *b1; 166217683Spst{ 166317683Spst if (b0->s.code == b1->s.code && 166417683Spst b0->s.k == b1->s.k && 166517683Spst b0->et.succ == b1->et.succ && 166617683Spst b0->ef.succ == b1->ef.succ) 166717683Spst return eq_slist(b0->stmts, b1->stmts); 166817683Spst return 0; 166917683Spst} 167017683Spst 167117683Spststatic void 167217683Spstintern_blocks(root) 167317683Spst struct block *root; 167417683Spst{ 167517683Spst struct block *p; 167617683Spst int i, j; 167717683Spst int done; 167817683Spst top: 167917683Spst done = 1; 168017683Spst for (i = 0; i < n_blocks; ++i) 168117683Spst blocks[i]->link = 0; 168217683Spst 168317683Spst mark_code(root); 168417683Spst 168517683Spst for (i = n_blocks - 1; --i >= 0; ) { 168617683Spst if (!isMarked(blocks[i])) 168717683Spst continue; 168817683Spst for (j = i + 1; j < n_blocks; ++j) { 168917683Spst if (!isMarked(blocks[j])) 169017683Spst continue; 169117683Spst if (eq_blk(blocks[i], blocks[j])) { 169217683Spst blocks[i]->link = blocks[j]->link ? 169317683Spst blocks[j]->link : blocks[j]; 169417683Spst break; 169517683Spst } 169617683Spst } 169717683Spst } 169817683Spst for (i = 0; i < n_blocks; ++i) { 169917683Spst p = blocks[i]; 170017683Spst if (JT(p) == 0) 170117683Spst continue; 170217683Spst if (JT(p)->link) { 170317683Spst done = 0; 170417683Spst JT(p) = JT(p)->link; 170517683Spst } 170617683Spst if (JF(p)->link) { 170717683Spst done = 0; 170817683Spst JF(p) = JF(p)->link; 170917683Spst } 171017683Spst } 171117683Spst if (!done) 171217683Spst goto top; 171317683Spst} 171417683Spst 171517683Spststatic void 171617683Spstopt_cleanup() 171717683Spst{ 171817683Spst free((void *)vnode_base); 171917683Spst free((void *)vmap); 172017683Spst free((void *)edges); 172117683Spst free((void *)space); 172217683Spst free((void *)levels); 172317683Spst free((void *)blocks); 172417683Spst} 172517683Spst 172617683Spst/* 172717683Spst * Return the number of stmts in 's'. 172817683Spst */ 172917683Spststatic int 173017683Spstslength(s) 173117683Spst struct slist *s; 173217683Spst{ 173317683Spst int n = 0; 173417683Spst 173517683Spst for (; s; s = s->next) 173617683Spst if (s->s.code != NOP) 173717683Spst ++n; 173817683Spst return n; 173917683Spst} 174017683Spst 174117683Spst/* 174217683Spst * Return the number of nodes reachable by 'p'. 174317683Spst * All nodes should be initially unmarked. 174417683Spst */ 174517683Spststatic int 174617683Spstcount_blocks(p) 174717683Spst struct block *p; 174817683Spst{ 174917683Spst if (p == 0 || isMarked(p)) 175017683Spst return 0; 175117683Spst Mark(p); 175217683Spst return count_blocks(JT(p)) + count_blocks(JF(p)) + 1; 175317683Spst} 175417683Spst 175517683Spst/* 175617683Spst * Do a depth first search on the flow graph, numbering the 175717683Spst * the basic blocks, and entering them into the 'blocks' array.` 175817683Spst */ 175917683Spststatic void 176017683Spstnumber_blks_r(p) 176117683Spst struct block *p; 176217683Spst{ 176317683Spst int n; 176417683Spst 176517683Spst if (p == 0 || isMarked(p)) 176617683Spst return; 176717683Spst 176817683Spst Mark(p); 176917683Spst n = n_blocks++; 177017683Spst p->id = n; 177117683Spst blocks[n] = p; 177217683Spst 177317683Spst number_blks_r(JT(p)); 177417683Spst number_blks_r(JF(p)); 177517683Spst} 177617683Spst 177717683Spst/* 177817683Spst * Return the number of stmts in the flowgraph reachable by 'p'. 177917683Spst * The nodes should be unmarked before calling. 178017683Spst */ 178117683Spststatic int 178217683Spstcount_stmts(p) 178317683Spst struct block *p; 178417683Spst{ 178517683Spst int n; 178617683Spst 178717683Spst if (p == 0 || isMarked(p)) 178817683Spst return 0; 178917683Spst Mark(p); 179017683Spst n = count_stmts(JT(p)) + count_stmts(JF(p)); 179117683Spst return slength(p->stmts) + n + 1; 179217683Spst} 179317683Spst 179417683Spst/* 179517683Spst * Allocate memory. All allocation is done before optimization 179617683Spst * is begun. A linear bound on the size of all data structures is computed 179717683Spst * from the total number of blocks and/or statements. 179817683Spst */ 179917683Spststatic void 180017683Spstopt_init(root) 180117683Spst struct block *root; 180217683Spst{ 180317683Spst bpf_u_int32 *p; 180417683Spst int i, n, max_stmts; 180517683Spst 180617683Spst /* 180717683Spst * First, count the blocks, so we can malloc an array to map 180817683Spst * block number to block. Then, put the blocks into the array. 180917683Spst */ 181017683Spst unMarkAll(); 181117683Spst n = count_blocks(root); 181217683Spst blocks = (struct block **)malloc(n * sizeof(*blocks)); 181317683Spst unMarkAll(); 181417683Spst n_blocks = 0; 181517683Spst number_blks_r(root); 181617683Spst 181717683Spst n_edges = 2 * n_blocks; 181817683Spst edges = (struct edge **)malloc(n_edges * sizeof(*edges)); 181917683Spst 182017683Spst /* 182117683Spst * The number of levels is bounded by the number of nodes. 182217683Spst */ 182317683Spst levels = (struct block **)malloc(n_blocks * sizeof(*levels)); 182417683Spst 182517683Spst edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1; 182617683Spst nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1; 182717683Spst 182817683Spst /* XXX */ 182917683Spst space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space) 183017683Spst + n_edges * edgewords * sizeof(*space)); 183117683Spst p = space; 183217683Spst all_dom_sets = p; 183317683Spst for (i = 0; i < n; ++i) { 183417683Spst blocks[i]->dom = p; 183517683Spst p += nodewords; 183617683Spst } 183717683Spst all_closure_sets = p; 183817683Spst for (i = 0; i < n; ++i) { 183917683Spst blocks[i]->closure = p; 184017683Spst p += nodewords; 184117683Spst } 184217683Spst all_edge_sets = p; 184317683Spst for (i = 0; i < n; ++i) { 184417683Spst register struct block *b = blocks[i]; 184517683Spst 184617683Spst b->et.edom = p; 184717683Spst p += edgewords; 184817683Spst b->ef.edom = p; 184917683Spst p += edgewords; 185017683Spst b->et.id = i; 185117683Spst edges[i] = &b->et; 185217683Spst b->ef.id = n_blocks + i; 185317683Spst edges[n_blocks + i] = &b->ef; 185417683Spst b->et.pred = b; 185517683Spst b->ef.pred = b; 185617683Spst } 185717683Spst max_stmts = 0; 185817683Spst for (i = 0; i < n; ++i) 185917683Spst max_stmts += slength(blocks[i]->stmts) + 1; 186017683Spst /* 186117683Spst * We allocate at most 3 value numbers per statement, 186217683Spst * so this is an upper bound on the number of valnodes 186317683Spst * we'll need. 186417683Spst */ 186517683Spst maxval = 3 * max_stmts; 186617683Spst vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap)); 186717683Spst vnode_base = (struct valnode *)malloc(maxval * sizeof(*vmap)); 186817683Spst} 186917683Spst 187017683Spst/* 187117683Spst * Some pointers used to convert the basic block form of the code, 187217683Spst * into the array form that BPF requires. 'fstart' will point to 187317683Spst * the malloc'd array while 'ftail' is used during the recursive traversal. 187417683Spst */ 187517683Spststatic struct bpf_insn *fstart; 187617683Spststatic struct bpf_insn *ftail; 187717683Spst 187817683Spst#ifdef BDEBUG 187917683Spstint bids[1000]; 188017683Spst#endif 188117683Spst 188217683Spst/* 188317683Spst * Returns true if successful. Returns false if a branch has 188417683Spst * an offset that is too large. If so, we have marked that 188517683Spst * branch so that on a subsequent iteration, it will be treated 188617683Spst * properly. 188717683Spst */ 188817683Spststatic int 188917683Spstconvert_code_r(p) 189017683Spst struct block *p; 189117683Spst{ 189217683Spst struct bpf_insn *dst; 189317683Spst struct slist *src; 189417683Spst int slen; 189517683Spst u_int off; 189617683Spst int extrajmps; /* number of extra jumps inserted */ 189756889Sfenner struct slist **offset = NULL; 189817683Spst 189917683Spst if (p == 0 || isMarked(p)) 190017683Spst return (1); 190117683Spst Mark(p); 190217683Spst 190317683Spst if (convert_code_r(JF(p)) == 0) 190417683Spst return (0); 190517683Spst if (convert_code_r(JT(p)) == 0) 190617683Spst return (0); 190717683Spst 190817683Spst slen = slength(p->stmts); 190917683Spst dst = ftail -= (slen + 1 + p->longjt + p->longjf); 191017683Spst /* inflate length by any extra jumps */ 191117683Spst 191217683Spst p->offset = dst - fstart; 191317683Spst 191456889Sfenner /* generate offset[] for convenience */ 191556889Sfenner if (slen) { 191656889Sfenner offset = (struct slist **)calloc(sizeof(struct slist *), slen); 191756889Sfenner if (!offset) { 191856889Sfenner bpf_error("not enough core"); 191956889Sfenner /*NOTREACHED*/ 192056889Sfenner } 192156889Sfenner } 192256889Sfenner src = p->stmts; 192356889Sfenner for (off = 0; off < slen && src; off++) { 192456889Sfenner#if 0 192556889Sfenner printf("off=%d src=%x\n", off, src); 192656889Sfenner#endif 192756889Sfenner offset[off] = src; 192856889Sfenner src = src->next; 192956889Sfenner } 193056889Sfenner 193156889Sfenner off = 0; 193217683Spst for (src = p->stmts; src; src = src->next) { 193317683Spst if (src->s.code == NOP) 193417683Spst continue; 193517683Spst dst->code = (u_short)src->s.code; 193617683Spst dst->k = src->s.k; 193756889Sfenner 193856889Sfenner /* fill block-local relative jump */ 193956889Sfenner if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == BPF_JMP|BPF_JA) { 194056889Sfenner#if 0 194156889Sfenner if (src->s.jt || src->s.jf) { 194256889Sfenner bpf_error("illegal jmp destination"); 194356889Sfenner /*NOTREACHED*/ 194456889Sfenner } 194556889Sfenner#endif 194656889Sfenner goto filled; 194756889Sfenner } 194856889Sfenner if (off == slen - 2) /*???*/ 194956889Sfenner goto filled; 195056889Sfenner 195156889Sfenner { 195256889Sfenner int i; 195356889Sfenner int jt, jf; 195456889Sfenner char *ljerr = "%s for block-local relative jump: off=%d"; 195556889Sfenner 195656889Sfenner#if 0 195756889Sfenner printf("code=%x off=%d %x %x\n", src->s.code, 195856889Sfenner off, src->s.jt, src->s.jf); 195956889Sfenner#endif 196056889Sfenner 196156889Sfenner if (!src->s.jt || !src->s.jf) { 196256889Sfenner bpf_error(ljerr, "no jmp destination", off); 196356889Sfenner /*NOTREACHED*/ 196456889Sfenner } 196556889Sfenner 196656889Sfenner jt = jf = 0; 196756889Sfenner for (i = 0; i < slen; i++) { 196856889Sfenner if (offset[i] == src->s.jt) { 196956889Sfenner if (jt) { 197056889Sfenner bpf_error(ljerr, "multiple matches", off); 197156889Sfenner /*NOTREACHED*/ 197256889Sfenner } 197356889Sfenner 197456889Sfenner dst->jt = i - off - 1; 197556889Sfenner jt++; 197656889Sfenner } 197756889Sfenner if (offset[i] == src->s.jf) { 197856889Sfenner if (jf) { 197956889Sfenner bpf_error(ljerr, "multiple matches", off); 198056889Sfenner /*NOTREACHED*/ 198156889Sfenner } 198256889Sfenner dst->jf = i - off - 1; 198356889Sfenner jf++; 198456889Sfenner } 198556889Sfenner } 198656889Sfenner if (!jt || !jf) { 198756889Sfenner bpf_error(ljerr, "no destination found", off); 198856889Sfenner /*NOTREACHED*/ 198956889Sfenner } 199056889Sfenner } 199156889Sfennerfilled: 199217683Spst ++dst; 199356889Sfenner ++off; 199417683Spst } 199556889Sfenner if (offset) 199656889Sfenner free(offset); 199756889Sfenner 199817683Spst#ifdef BDEBUG 199917683Spst bids[dst - fstart] = p->id + 1; 200017683Spst#endif 200117683Spst dst->code = (u_short)p->s.code; 200217683Spst dst->k = p->s.k; 200317683Spst if (JT(p)) { 200417683Spst extrajmps = 0; 200517683Spst off = JT(p)->offset - (p->offset + slen) - 1; 200617683Spst if (off >= 256) { 200717683Spst /* offset too large for branch, must add a jump */ 200817683Spst if (p->longjt == 0) { 200917683Spst /* mark this instruction and retry */ 201017683Spst p->longjt++; 201117683Spst return(0); 201217683Spst } 201317683Spst /* branch if T to following jump */ 201417683Spst dst->jt = extrajmps; 201517683Spst extrajmps++; 201617683Spst dst[extrajmps].code = BPF_JMP|BPF_JA; 201717683Spst dst[extrajmps].k = off - extrajmps; 201817683Spst } 201917683Spst else 202017683Spst dst->jt = off; 202117683Spst off = JF(p)->offset - (p->offset + slen) - 1; 202217683Spst if (off >= 256) { 202317683Spst /* offset too large for branch, must add a jump */ 202417683Spst if (p->longjf == 0) { 202517683Spst /* mark this instruction and retry */ 202617683Spst p->longjf++; 202717683Spst return(0); 202817683Spst } 202917683Spst /* branch if F to following jump */ 203017683Spst /* if two jumps are inserted, F goes to second one */ 203117683Spst dst->jf = extrajmps; 203217683Spst extrajmps++; 203317683Spst dst[extrajmps].code = BPF_JMP|BPF_JA; 203417683Spst dst[extrajmps].k = off - extrajmps; 203517683Spst } 203617683Spst else 203717683Spst dst->jf = off; 203817683Spst } 203917683Spst return (1); 204017683Spst} 204117683Spst 204217683Spst 204317683Spst/* 204417683Spst * Convert flowgraph intermediate representation to the 204517683Spst * BPF array representation. Set *lenp to the number of instructions. 204617683Spst */ 204717683Spststruct bpf_insn * 204817683Spsticode_to_fcode(root, lenp) 204917683Spst struct block *root; 205017683Spst int *lenp; 205117683Spst{ 205217683Spst int n; 205317683Spst struct bpf_insn *fp; 205417683Spst 205517683Spst /* 205617683Spst * Loop doing convert_codr_r() until no branches remain 205717683Spst * with too-large offsets. 205817683Spst */ 205917683Spst while (1) { 206017683Spst unMarkAll(); 206117683Spst n = *lenp = count_stmts(root); 206217683Spst 206317683Spst fp = (struct bpf_insn *)malloc(sizeof(*fp) * n); 206417683Spst memset((char *)fp, 0, sizeof(*fp) * n); 206517683Spst fstart = fp; 206617683Spst ftail = fp + n; 206717683Spst 206817683Spst unMarkAll(); 206917683Spst if (convert_code_r(root)) 207017683Spst break; 207117683Spst free(fp); 207217683Spst } 207317683Spst 207417683Spst return fp; 207517683Spst} 207617683Spst 207717683Spst#ifdef BDEBUG 207817683Spststatic void 207917683Spstopt_dump(root) 208017683Spst struct block *root; 208117683Spst{ 208217683Spst struct bpf_program f; 208317683Spst 208417683Spst memset(bids, 0, sizeof bids); 208517683Spst f.bf_insns = icode_to_fcode(root, &f.bf_len); 208617683Spst bpf_dump(&f, 1); 208717683Spst putchar('\n'); 208817683Spst free((char *)f.bf_insns); 208917683Spst} 209017683Spst#endif 2091