optimize.c revision 241231
1191783Srmacklem/* 2191783Srmacklem * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996 3191783Srmacklem * The Regents of the University of California. All rights reserved. 4191783Srmacklem * 5191783Srmacklem * Redistribution and use in source and binary forms, with or without 6191783Srmacklem * modification, are permitted provided that: (1) source code distributions 7191783Srmacklem * retain the above copyright notice and this paragraph in its entirety, (2) 8191783Srmacklem * distributions including binary code include the above copyright notice and 9191783Srmacklem * this paragraph in its entirety in the documentation or other materials 10191783Srmacklem * provided with the distribution, and (3) all advertising materials mentioning 11191783Srmacklem * features or use of this software display the following acknowledgement: 12191783Srmacklem * ``This product includes software developed by the University of California, 13191783Srmacklem * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of 14191783Srmacklem * the University nor the names of its contributors may be used to endorse 15191783Srmacklem * or promote products derived from this software without specific prior 16191783Srmacklem * written permission. 17191783Srmacklem * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 18191783Srmacklem * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 19191783Srmacklem * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 20191783Srmacklem * 21191783Srmacklem * Optimization module for tcpdump intermediate representation. 22191783Srmacklem */ 23191783Srmacklem#ifndef lint 24191783Srmacklemstatic const char rcsid[] _U_ = 25191783Srmacklem "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.91 2008-01-02 04:16:46 guy Exp $ (LBL)"; 26191783Srmacklem#endif 27191783Srmacklem 28191783Srmacklem#ifdef HAVE_CONFIG_H 29191783Srmacklem#include "config.h" 30191783Srmacklem#endif 31191783Srmacklem 32191783Srmacklem#ifdef WIN32 33191783Srmacklem#include <pcap-stdinc.h> 34191783Srmacklem#else /* WIN32 */ 35191783Srmacklem#if HAVE_INTTYPES_H 36191783Srmacklem#include <inttypes.h> 37191783Srmacklem#elif HAVE_STDINT_H 38191783Srmacklem#include <stdint.h> 39191783Srmacklem#endif 40191783Srmacklem#ifdef HAVE_SYS_BITYPES_H 41191783Srmacklem#include <sys/bitypes.h> 42191783Srmacklem#endif 43191783Srmacklem#include <sys/types.h> 44191783Srmacklem#endif /* WIN32 */ 45191783Srmacklem 46191783Srmacklem#include <stdio.h> 47191783Srmacklem#include <stdlib.h> 48191783Srmacklem#include <memory.h> 49191783Srmacklem#include <string.h> 50191783Srmacklem 51191783Srmacklem#include <errno.h> 52191783Srmacklem 53191783Srmacklem#include "pcap-int.h" 54191783Srmacklem 55191783Srmacklem#include "gencode.h" 56191783Srmacklem 57191783Srmacklem#ifdef HAVE_OS_PROTO_H 58191783Srmacklem#include "os-proto.h" 59191783Srmacklem#endif 60191783Srmacklem 61191783Srmacklem#ifdef BDEBUG 62191783Srmacklemextern int dflag; 63191783Srmacklem#endif 64191783Srmacklem 65191783Srmacklem#if defined(MSDOS) && !defined(__DJGPP__) 66191783Srmacklemextern int _w32_ffs (int mask); 67191783Srmacklem#define ffs _w32_ffs 68191783Srmacklem#endif 69191783Srmacklem 70191783Srmacklem#if defined(WIN32) && defined (_MSC_VER) 71191783Srmacklemint ffs(int mask); 72191783Srmacklem#endif 73191783Srmacklem 74191783Srmacklem/* 75191783Srmacklem * Represents a deleted instruction. 76191783Srmacklem */ 77191783Srmacklem#define NOP -1 78191783Srmacklem 79191783Srmacklem/* 80191783Srmacklem * Register numbers for use-def values. 81191783Srmacklem * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory 82191783Srmacklem * location. A_ATOM is the accumulator and X_ATOM is the index 83191783Srmacklem * register. 84191783Srmacklem */ 85191783Srmacklem#define A_ATOM BPF_MEMWORDS 86191783Srmacklem#define X_ATOM (BPF_MEMWORDS+1) 87191783Srmacklem 88191783Srmacklem/* 89191783Srmacklem * This define is used to represent *both* the accumulator and 90191783Srmacklem * x register in use-def computations. 91191783Srmacklem * Currently, the use-def code assumes only one definition per instruction. 92191783Srmacklem */ 93191783Srmacklem#define AX_ATOM N_ATOMS 94191783Srmacklem 95191783Srmacklem/* 96191783Srmacklem * A flag to indicate that further optimization is needed. 97191783Srmacklem * Iterative passes are continued until a given pass yields no 98191783Srmacklem * branch movement. 99191783Srmacklem */ 100191783Srmacklemstatic int done; 101191783Srmacklem 102191783Srmacklem/* 103191783Srmacklem * A block is marked if only if its mark equals the current mark. 104191783Srmacklem * Rather than traverse the code array, marking each item, 'cur_mark' is 105191783Srmacklem * incremented. This automatically makes each element unmarked. 106191783Srmacklem */ 107191783Srmacklemstatic int cur_mark; 108191783Srmacklem#define isMarked(p) ((p)->mark == cur_mark) 109191783Srmacklem#define unMarkAll() cur_mark += 1 110191783Srmacklem#define Mark(p) ((p)->mark = cur_mark) 111191783Srmacklem 112191783Srmacklemstatic void opt_init(struct block *); 113191783Srmacklemstatic void opt_cleanup(void); 114191783Srmacklem 115191783Srmacklemstatic void make_marks(struct block *); 116191783Srmacklemstatic void mark_code(struct block *); 117191783Srmacklem 118191783Srmacklemstatic void intern_blocks(struct block *); 119191783Srmacklem 120191783Srmacklemstatic int eq_slist(struct slist *, struct slist *); 121191783Srmacklem 122191783Srmacklemstatic void find_levels_r(struct block *); 123191783Srmacklem 124191783Srmacklemstatic void find_levels(struct block *); 125191783Srmacklemstatic void find_dom(struct block *); 126191783Srmacklemstatic void propedom(struct edge *); 127191783Srmacklemstatic void find_edom(struct block *); 128191783Srmacklemstatic void find_closure(struct block *); 129191783Srmacklemstatic int atomuse(struct stmt *); 130191783Srmacklemstatic int atomdef(struct stmt *); 131191783Srmacklemstatic void compute_local_ud(struct block *); 132191783Srmacklemstatic void find_ud(struct block *); 133191783Srmacklemstatic void init_val(void); 134191783Srmacklemstatic int F(int, int, int); 135191783Srmacklemstatic inline void vstore(struct stmt *, int *, int, int); 136191783Srmacklemstatic void opt_blk(struct block *, int); 137191783Srmacklemstatic int use_conflict(struct block *, struct block *); 138192145Srmacklemstatic void opt_j(struct edge *); 139192145Srmacklemstatic void or_pullup(struct block *); 140191783Srmacklemstatic void and_pullup(struct block *); 141191783Srmacklemstatic void opt_blks(struct block *, int); 142192145Srmacklemstatic inline void link_inedge(struct edge *, struct block *); 143191783Srmacklemstatic void find_inedges(struct block *); 144191783Srmacklemstatic void opt_root(struct block **); 145192145Srmacklemstatic void opt_loop(struct block *, int); 146192145Srmacklemstatic void fold_op(struct stmt *, int, int); 147191783Srmacklemstatic inline struct slist *this_op(struct slist *); 148191783Srmacklemstatic void opt_not(struct block *); 149191783Srmacklemstatic void opt_peep(struct block *); 150191783Srmacklemstatic void opt_stmt(struct stmt *, int[], int); 151191783Srmacklemstatic void deadstmt(struct stmt *, struct stmt *[]); 152191783Srmacklemstatic void opt_deadstores(struct block *); 153191783Srmacklemstatic struct block *fold_edge(struct block *, struct edge *); 154191783Srmacklemstatic inline int eq_blk(struct block *, struct block *); 155191783Srmacklemstatic u_int slength(struct slist *); 156191783Srmacklemstatic int count_blocks(struct block *); 157191783Srmacklemstatic void number_blks_r(struct block *); 158191783Srmacklemstatic u_int count_stmts(struct block *); 159191783Srmacklemstatic int convert_code_r(struct block *); 160191783Srmacklem#ifdef BDEBUG 161191783Srmacklemstatic void opt_dump(struct block *); 162191783Srmacklem#endif 163191783Srmacklem 164191783Srmacklemstatic int n_blocks; 165191783Srmacklemstruct block **blocks; 166191783Srmacklemstatic int n_edges; 167191783Srmacklemstruct edge **edges; 168191783Srmacklem 169191783Srmacklem/* 170191783Srmacklem * A bit vector set representation of the dominators. 171191783Srmacklem * We round up the set size to the next power of two. 172191783Srmacklem */ 173191783Srmacklemstatic int nodewords; 174191783Srmacklemstatic int edgewords; 175191783Srmacklemstruct block **levels; 176191783Srmacklembpf_u_int32 *space; 177191783Srmacklem#define BITS_PER_WORD (8*sizeof(bpf_u_int32)) 178191783Srmacklem/* 179191783Srmacklem * True if a is in uset {p} 180191783Srmacklem */ 181191783Srmacklem#define SET_MEMBER(p, a) \ 182191783Srmacklem((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD))) 183191783Srmacklem 184191783Srmacklem/* 185191783Srmacklem * Add 'a' to uset p. 186191783Srmacklem */ 187191783Srmacklem#define SET_INSERT(p, a) \ 188191783Srmacklem(p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD)) 189191783Srmacklem 190191783Srmacklem/* 191191783Srmacklem * Delete 'a' from uset p. 192191783Srmacklem */ 193191783Srmacklem#define SET_DELETE(p, a) \ 194191783Srmacklem(p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD)) 195191783Srmacklem 196191783Srmacklem/* 197191783Srmacklem * a := a intersect b 198191783Srmacklem */ 199191783Srmacklem#define SET_INTERSECT(a, b, n)\ 200191783Srmacklem{\ 201191783Srmacklem register bpf_u_int32 *_x = a, *_y = b;\ 202191783Srmacklem register int _n = n;\ 203191783Srmacklem while (--_n >= 0) *_x++ &= *_y++;\ 204191783Srmacklem} 205191783Srmacklem 206191783Srmacklem/* 207191783Srmacklem * a := a - b 208191783Srmacklem */ 209191783Srmacklem#define SET_SUBTRACT(a, b, n)\ 210191783Srmacklem{\ 211191783Srmacklem register bpf_u_int32 *_x = a, *_y = b;\ 212191783Srmacklem register int _n = n;\ 213191783Srmacklem while (--_n >= 0) *_x++ &=~ *_y++;\ 214191783Srmacklem} 215191783Srmacklem 216191783Srmacklem/* 217191783Srmacklem * a := a union b 218191783Srmacklem */ 219191783Srmacklem#define SET_UNION(a, b, n)\ 220191783Srmacklem{\ 221191783Srmacklem register bpf_u_int32 *_x = a, *_y = b;\ 222191783Srmacklem register int _n = n;\ 223191783Srmacklem while (--_n >= 0) *_x++ |= *_y++;\ 224192145Srmacklem} 225191783Srmacklem 226192145Srmacklemstatic uset all_dom_sets; 227192145Srmacklemstatic uset all_closure_sets; 228192145Srmacklemstatic uset all_edge_sets; 229191783Srmacklem 230191783Srmacklem#ifndef MAX 231192145Srmacklem#define MAX(a,b) ((a)>(b)?(a):(b)) 232192145Srmacklem#endif 233191783Srmacklem 234191783Srmacklemstatic void 235192145Srmacklemfind_levels_r(b) 236192145Srmacklem struct block *b; 237191783Srmacklem{ 238191783Srmacklem int level; 239192145Srmacklem 240192145Srmacklem if (isMarked(b)) 241192145Srmacklem return; 242191783Srmacklem 243191783Srmacklem Mark(b); 244192145Srmacklem b->link = 0; 245191783Srmacklem 246191783Srmacklem if (JT(b)) { 247191783Srmacklem find_levels_r(JT(b)); 248191783Srmacklem find_levels_r(JF(b)); 249191783Srmacklem level = MAX(JT(b)->level, JF(b)->level) + 1; 250191783Srmacklem } else 251191783Srmacklem level = 0; 252191990Sattilio b->level = level; 253191783Srmacklem b->link = levels[level]; 254191783Srmacklem levels[level] = b; 255191990Sattilio} 256191783Srmacklem 257191783Srmacklem/* 258191783Srmacklem * Level graph. The levels go from 0 at the leaves to 259191783Srmacklem * N_LEVELS at the root. The levels[] array points to the 260191783Srmacklem * first node of the level list, whose elements are linked 261191783Srmacklem * with the 'link' field of the struct block. 262191783Srmacklem */ 263191990Sattiliostatic void 264191990Sattiliofind_levels(root) 265191783Srmacklem struct block *root; 266191783Srmacklem{ 267191783Srmacklem memset((char *)levels, 0, n_blocks * sizeof(*levels)); 268191783Srmacklem unMarkAll(); 269191783Srmacklem find_levels_r(root); 270191783Srmacklem} 271191783Srmacklem 272191783Srmacklem/* 273191783Srmacklem * Find dominator relationships. 274191783Srmacklem * Assumes graph has been leveled. 275191783Srmacklem */ 276191783Srmacklemstatic void 277191783Srmacklemfind_dom(root) 278191783Srmacklem struct block *root; 279191783Srmacklem{ 280191783Srmacklem int i; 281191783Srmacklem struct block *b; 282191783Srmacklem bpf_u_int32 *x; 283191783Srmacklem 284191783Srmacklem /* 285191783Srmacklem * Initialize sets to contain all nodes. 286191783Srmacklem */ 287191783Srmacklem x = all_dom_sets; 288191783Srmacklem i = n_blocks * nodewords; 289191783Srmacklem while (--i >= 0) 290191783Srmacklem *x++ = ~0; 291191783Srmacklem /* Root starts off empty. */ 292191783Srmacklem for (i = nodewords; --i >= 0;) 293191783Srmacklem root->dom[i] = 0; 294191783Srmacklem 295191783Srmacklem /* root->level is the highest level no found. */ 296191783Srmacklem for (i = root->level; i >= 0; --i) { 297191783Srmacklem for (b = levels[i]; b; b = b->link) { 298191783Srmacklem SET_INSERT(b->dom, b->id); 299191783Srmacklem if (JT(b) == 0) 300191783Srmacklem continue; 301191783Srmacklem SET_INTERSECT(JT(b)->dom, b->dom, nodewords); 302191783Srmacklem SET_INTERSECT(JF(b)->dom, b->dom, nodewords); 303191783Srmacklem } 304191783Srmacklem } 305191783Srmacklem} 306191783Srmacklem 307191783Srmacklemstatic void 308191783Srmacklempropedom(ep) 309191783Srmacklem struct edge *ep; 310191783Srmacklem{ 311191783Srmacklem SET_INSERT(ep->edom, ep->id); 312191783Srmacklem if (ep->succ) { 313191783Srmacklem SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords); 314191783Srmacklem SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords); 315191783Srmacklem } 316191783Srmacklem} 317191783Srmacklem 318191783Srmacklem/* 319191783Srmacklem * Compute edge dominators. 320191783Srmacklem * Assumes graph has been leveled and predecessors established. 321191783Srmacklem */ 322191783Srmacklemstatic void 323191783Srmacklemfind_edom(root) 324191783Srmacklem struct block *root; 325191783Srmacklem{ 326191783Srmacklem int i; 327191783Srmacklem uset x; 328191783Srmacklem struct block *b; 329191783Srmacklem 330191783Srmacklem x = all_edge_sets; 331191783Srmacklem for (i = n_edges * edgewords; --i >= 0; ) 332191783Srmacklem x[i] = ~0; 333191783Srmacklem 334191783Srmacklem /* root->level is the highest level no found. */ 335191783Srmacklem memset(root->et.edom, 0, edgewords * sizeof(*(uset)0)); 336191783Srmacklem memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0)); 337191783Srmacklem for (i = root->level; i >= 0; --i) { 338191783Srmacklem for (b = levels[i]; b != 0; b = b->link) { 339191783Srmacklem propedom(&b->et); 340191783Srmacklem propedom(&b->ef); 341191783Srmacklem } 342191783Srmacklem } 343191783Srmacklem} 344191783Srmacklem 345191783Srmacklem/* 346191783Srmacklem * Find the backwards transitive closure of the flow graph. These sets 347191783Srmacklem * are backwards in the sense that we find the set of nodes that reach 348191783Srmacklem * a given node, not the set of nodes that can be reached by a node. 349191783Srmacklem * 350191783Srmacklem * Assumes graph has been leveled. 351191783Srmacklem */ 352191783Srmacklemstatic void 353191783Srmacklemfind_closure(root) 354191783Srmacklem struct block *root; 355191783Srmacklem{ 356191783Srmacklem int i; 357191783Srmacklem struct block *b; 358191783Srmacklem 359191783Srmacklem /* 360191783Srmacklem * Initialize sets to contain no nodes. 361191783Srmacklem */ 362191783Srmacklem memset((char *)all_closure_sets, 0, 363192145Srmacklem n_blocks * nodewords * sizeof(*all_closure_sets)); 364191783Srmacklem 365191783Srmacklem /* root->level is the highest level no found. */ 366191783Srmacklem for (i = root->level; i >= 0; --i) { 367192145Srmacklem for (b = levels[i]; b; b = b->link) { 368191783Srmacklem SET_INSERT(b->closure, b->id); 369192145Srmacklem if (JT(b) == 0) 370192145Srmacklem continue; 371191783Srmacklem SET_UNION(JT(b)->closure, b->closure, nodewords); 372191783Srmacklem SET_UNION(JF(b)->closure, b->closure, nodewords); 373191783Srmacklem } 374191783Srmacklem } 375191783Srmacklem} 376191783Srmacklem 377191783Srmacklem/* 378191783Srmacklem * Return the register number that is used by s. If A and X are both 379191783Srmacklem * used, return AX_ATOM. If no register is used, return -1. 380191783Srmacklem * 381192145Srmacklem * The implementation should probably change to an array access. 382191783Srmacklem */ 383191783Srmacklemstatic int 384191783Srmacklematomuse(s) 385191783Srmacklem struct stmt *s; 386191783Srmacklem{ 387191783Srmacklem register int c = s->code; 388191783Srmacklem 389191783Srmacklem if (c == NOP) 390191783Srmacklem return -1; 391191783Srmacklem 392191783Srmacklem switch (BPF_CLASS(c)) { 393191783Srmacklem 394191783Srmacklem case BPF_RET: 395191783Srmacklem return (BPF_RVAL(c) == BPF_A) ? A_ATOM : 396191783Srmacklem (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1; 397191783Srmacklem 398191783Srmacklem case BPF_LD: 399191783Srmacklem case BPF_LDX: 400191783Srmacklem return (BPF_MODE(c) == BPF_IND) ? X_ATOM : 401191783Srmacklem (BPF_MODE(c) == BPF_MEM) ? s->k : -1; 402191783Srmacklem 403192145Srmacklem case BPF_ST: 404191783Srmacklem return A_ATOM; 405191783Srmacklem 406191783Srmacklem case BPF_STX: 407191783Srmacklem return X_ATOM; 408191783Srmacklem 409191783Srmacklem case BPF_JMP: 410191783Srmacklem case BPF_ALU: 411191783Srmacklem if (BPF_SRC(c) == BPF_X) 412191783Srmacklem return AX_ATOM; 413191783Srmacklem return A_ATOM; 414191783Srmacklem 415191783Srmacklem case BPF_MISC: 416191783Srmacklem return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM; 417191783Srmacklem } 418191783Srmacklem abort(); 419191783Srmacklem /* NOTREACHED */ 420191783Srmacklem} 421191783Srmacklem 422191783Srmacklem/* 423191783Srmacklem * Return the register number that is defined by 's'. We assume that 424192145Srmacklem * a single stmt cannot define more than one register. If no register 425191783Srmacklem * is defined, return -1. 426191783Srmacklem * 427191783Srmacklem * The implementation should probably change to an array access. 428191783Srmacklem */ 429191783Srmacklemstatic int 430191783Srmacklematomdef(s) 431192145Srmacklem struct stmt *s; 432191783Srmacklem{ 433191783Srmacklem if (s->code == NOP) 434191783Srmacklem return -1; 435191783Srmacklem 436191783Srmacklem switch (BPF_CLASS(s->code)) { 437191783Srmacklem 438191783Srmacklem case BPF_LD: 439191783Srmacklem case BPF_ALU: 440191783Srmacklem return A_ATOM; 441191783Srmacklem 442191783Srmacklem case BPF_LDX: 443191783Srmacklem return X_ATOM; 444191783Srmacklem 445191783Srmacklem case BPF_ST: 446191783Srmacklem case BPF_STX: 447191783Srmacklem return s->k; 448191783Srmacklem 449192145Srmacklem case BPF_MISC: 450191783Srmacklem return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM; 451191783Srmacklem } 452191783Srmacklem return -1; 453191783Srmacklem} 454191783Srmacklem 455192145Srmacklem/* 456191783Srmacklem * Compute the sets of registers used, defined, and killed by 'b'. 457191783Srmacklem * 458191783Srmacklem * "Used" means that a statement in 'b' uses the register before any 459191783Srmacklem * statement in 'b' defines it, i.e. it uses the value left in 460191783Srmacklem * that register by a predecessor block of this block. 461191783Srmacklem * "Defined" means that a statement in 'b' defines it. 462191783Srmacklem * "Killed" means that a statement in 'b' defines it before any 463191783Srmacklem * statement in 'b' uses it, i.e. it kills the value left in that 464191783Srmacklem * register by a predecessor block of this block. 465191783Srmacklem */ 466191783Srmacklemstatic void 467191783Srmacklemcompute_local_ud(b) 468192145Srmacklem struct block *b; 469191783Srmacklem{ 470191783Srmacklem struct slist *s; 471191783Srmacklem atomset def = 0, use = 0, kill = 0; 472191783Srmacklem int atom; 473191783Srmacklem 474191783Srmacklem for (s = b->stmts; s; s = s->next) { 475191783Srmacklem if (s->s.code == NOP) 476191783Srmacklem continue; 477191783Srmacklem atom = atomuse(&s->s); 478191783Srmacklem if (atom >= 0) { 479192145Srmacklem if (atom == AX_ATOM) { 480192145Srmacklem if (!ATOMELEM(def, X_ATOM)) 481192145Srmacklem use |= ATOMMASK(X_ATOM); 482191783Srmacklem if (!ATOMELEM(def, A_ATOM)) 483192145Srmacklem use |= ATOMMASK(A_ATOM); 484191783Srmacklem } 485192145Srmacklem else if (atom < N_ATOMS) { 486191783Srmacklem if (!ATOMELEM(def, atom)) 487191783Srmacklem use |= ATOMMASK(atom); 488191783Srmacklem } 489191783Srmacklem else 490191783Srmacklem abort(); 491191783Srmacklem } 492191783Srmacklem atom = atomdef(&s->s); 493191783Srmacklem if (atom >= 0) { 494191783Srmacklem if (!ATOMELEM(use, atom)) 495191783Srmacklem kill |= ATOMMASK(atom); 496191783Srmacklem def |= ATOMMASK(atom); 497191783Srmacklem } 498191783Srmacklem } 499191783Srmacklem if (BPF_CLASS(b->s.code) == BPF_JMP) { 500191783Srmacklem /* 501191783Srmacklem * XXX - what about RET? 502191783Srmacklem */ 503191783Srmacklem atom = atomuse(&b->s); 504192145Srmacklem if (atom >= 0) { 505191783Srmacklem if (atom == AX_ATOM) { 506191783Srmacklem if (!ATOMELEM(def, X_ATOM)) 507191783Srmacklem use |= ATOMMASK(X_ATOM); 508191783Srmacklem if (!ATOMELEM(def, A_ATOM)) 509191783Srmacklem use |= ATOMMASK(A_ATOM); 510191783Srmacklem } 511191783Srmacklem else if (atom < N_ATOMS) { 512191783Srmacklem if (!ATOMELEM(def, atom)) 513191783Srmacklem use |= ATOMMASK(atom); 514191783Srmacklem } 515191783Srmacklem else 516191783Srmacklem abort(); 517191783Srmacklem } 518191783Srmacklem } 519191783Srmacklem 520191783Srmacklem b->def = def; 521191783Srmacklem b->kill = kill; 522191783Srmacklem b->in_use = use; 523191783Srmacklem} 524191783Srmacklem 525191783Srmacklem/* 526191783Srmacklem * Assume graph is already leveled. 527191783Srmacklem */ 528191783Srmacklemstatic void 529191783Srmacklemfind_ud(root) 530191783Srmacklem struct block *root; 531191783Srmacklem{ 532191783Srmacklem int i, maxlevel; 533191783Srmacklem struct block *p; 534191783Srmacklem 535191783Srmacklem /* 536191783Srmacklem * root->level is the highest level no found; 537191783Srmacklem * count down from there. 538191783Srmacklem */ 539191783Srmacklem maxlevel = root->level; 540191783Srmacklem for (i = maxlevel; i >= 0; --i) 541191783Srmacklem for (p = levels[i]; p; p = p->link) { 542191783Srmacklem compute_local_ud(p); 543191783Srmacklem p->out_use = 0; 544191783Srmacklem } 545191783Srmacklem 546191783Srmacklem for (i = 1; i <= maxlevel; ++i) { 547191783Srmacklem for (p = levels[i]; p; p = p->link) { 548191783Srmacklem p->out_use |= JT(p)->in_use | JF(p)->in_use; 549191783Srmacklem p->in_use |= p->out_use &~ p->kill; 550191783Srmacklem } 551191783Srmacklem } 552191783Srmacklem} 553191783Srmacklem 554191783Srmacklem/* 555191783Srmacklem * These data structures are used in a Cocke and Shwarz style 556191783Srmacklem * value numbering scheme. Since the flowgraph is acyclic, 557191783Srmacklem * exit values can be propagated from a node's predecessors 558191783Srmacklem * provided it is uniquely defined. 559191783Srmacklem */ 560191783Srmacklemstruct valnode { 561191783Srmacklem int code; 562191783Srmacklem int v0, v1; 563191783Srmacklem int val; 564191783Srmacklem struct valnode *next; 565191783Srmacklem}; 566191783Srmacklem 567191783Srmacklem#define MODULUS 213 568191783Srmacklemstatic struct valnode *hashtbl[MODULUS]; 569191783Srmacklemstatic int curval; 570191783Srmacklemstatic int maxval; 571191783Srmacklem 572191783Srmacklem/* Integer constants mapped with the load immediate opcode. */ 573191783Srmacklem#define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L) 574191783Srmacklem 575191783Srmacklemstruct vmapinfo { 576191783Srmacklem int is_const; 577191783Srmacklem bpf_int32 const_val; 578191783Srmacklem}; 579191783Srmacklem 580191783Srmacklemstruct vmapinfo *vmap; 581191783Srmacklemstruct valnode *vnode_base; 582191783Srmacklemstruct valnode *next_vnode; 583191783Srmacklem 584191783Srmacklemstatic void 585191783Srmackleminit_val() 586191783Srmacklem{ 587191783Srmacklem curval = 0; 588191783Srmacklem next_vnode = vnode_base; 589191783Srmacklem memset((char *)vmap, 0, maxval * sizeof(*vmap)); 590191783Srmacklem memset((char *)hashtbl, 0, sizeof hashtbl); 591191783Srmacklem} 592191783Srmacklem 593191783Srmacklem/* Because we really don't have an IR, this stuff is a little messy. */ 594191783Srmacklemstatic int 595191783SrmacklemF(code, v0, v1) 596191783Srmacklem int code; 597191783Srmacklem int v0, v1; 598191783Srmacklem{ 599191783Srmacklem u_int hash; 600191783Srmacklem int val; 601191783Srmacklem struct valnode *p; 602191783Srmacklem 603191783Srmacklem hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8); 604191783Srmacklem hash %= MODULUS; 605191783Srmacklem 606191783Srmacklem for (p = hashtbl[hash]; p; p = p->next) 607191783Srmacklem if (p->code == code && p->v0 == v0 && p->v1 == v1) 608191783Srmacklem return p->val; 609191783Srmacklem 610191783Srmacklem val = ++curval; 611191783Srmacklem if (BPF_MODE(code) == BPF_IMM && 612191783Srmacklem (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) { 613191783Srmacklem vmap[val].const_val = v0; 614191783Srmacklem vmap[val].is_const = 1; 615191783Srmacklem } 616191783Srmacklem p = next_vnode++; 617191783Srmacklem p->val = val; 618191783Srmacklem p->code = code; 619191783Srmacklem p->v0 = v0; 620191783Srmacklem p->v1 = v1; 621191783Srmacklem p->next = hashtbl[hash]; 622191783Srmacklem hashtbl[hash] = p; 623191783Srmacklem 624191783Srmacklem return val; 625191783Srmacklem} 626191783Srmacklem 627191783Srmacklemstatic inline void 628191783Srmacklemvstore(s, valp, newval, alter) 629191783Srmacklem struct stmt *s; 630191783Srmacklem int *valp; 631191783Srmacklem int newval; 632191783Srmacklem int alter; 633191783Srmacklem{ 634191783Srmacklem if (alter && *valp == newval) 635191783Srmacklem s->code = NOP; 636191783Srmacklem else 637191783Srmacklem *valp = newval; 638191783Srmacklem} 639191783Srmacklem 640191783Srmacklemstatic void 641191783Srmacklemfold_op(s, v0, v1) 642191783Srmacklem struct stmt *s; 643191783Srmacklem int v0, v1; 644191783Srmacklem{ 645191783Srmacklem bpf_u_int32 a, b; 646191783Srmacklem 647191783Srmacklem a = vmap[v0].const_val; 648191783Srmacklem b = vmap[v1].const_val; 649191783Srmacklem 650191783Srmacklem switch (BPF_OP(s->code)) { 651191783Srmacklem case BPF_ADD: 652191783Srmacklem a += b; 653191783Srmacklem break; 654191783Srmacklem 655191783Srmacklem case BPF_SUB: 656191783Srmacklem a -= b; 657191783Srmacklem break; 658191783Srmacklem 659191783Srmacklem case BPF_MUL: 660191783Srmacklem a *= b; 661191783Srmacklem break; 662191783Srmacklem 663191783Srmacklem case BPF_DIV: 664191783Srmacklem if (b == 0) 665191783Srmacklem bpf_error("division by zero"); 666191783Srmacklem a /= b; 667191783Srmacklem break; 668191783Srmacklem 669191783Srmacklem case BPF_AND: 670191783Srmacklem a &= b; 671191783Srmacklem break; 672191783Srmacklem 673191783Srmacklem case BPF_OR: 674191990Sattilio a |= b; 675191783Srmacklem break; 676191783Srmacklem 677191783Srmacklem case BPF_LSH: 678191783Srmacklem a <<= b; 679191783Srmacklem break; 680191783Srmacklem 681191783Srmacklem case BPF_RSH: 682191783Srmacklem a >>= b; 683191783Srmacklem break; 684191783Srmacklem 685191783Srmacklem case BPF_NEG: 686191783Srmacklem a = -a; 687191783Srmacklem break; 688191783Srmacklem 689191783Srmacklem default: 690191783Srmacklem abort(); 691191783Srmacklem } 692191783Srmacklem s->k = a; 693191783Srmacklem s->code = BPF_LD|BPF_IMM; 694191783Srmacklem done = 0; 695191783Srmacklem} 696191783Srmacklem 697191783Srmacklemstatic inline struct slist * 698191783Srmacklemthis_op(s) 699191783Srmacklem struct slist *s; 700191783Srmacklem{ 701191783Srmacklem while (s != 0 && s->s.code == NOP) 702191783Srmacklem s = s->next; 703191783Srmacklem return s; 704191990Sattilio} 705191783Srmacklem 706191783Srmacklemstatic void 707191783Srmacklemopt_not(b) 708191783Srmacklem struct block *b; 709191783Srmacklem{ 710191783Srmacklem struct block *tmp = JT(b); 711191783Srmacklem 712191783Srmacklem JT(b) = JF(b); 713191783Srmacklem JF(b) = tmp; 714191990Sattilio} 715191783Srmacklem 716192145Srmacklemstatic void 717191783Srmacklemopt_peep(b) 718191783Srmacklem struct block *b; 719191783Srmacklem{ 720191783Srmacklem struct slist *s; 721191783Srmacklem struct slist *next, *last; 722191783Srmacklem int val; 723191783Srmacklem 724191783Srmacklem s = b->stmts; 725191783Srmacklem if (s == 0) 726191783Srmacklem return; 727191783Srmacklem 728191783Srmacklem last = s; 729191783Srmacklem for (/*empty*/; /*empty*/; s = next) { 730191783Srmacklem /* 731191783Srmacklem * Skip over nops. 732191783Srmacklem */ 733191783Srmacklem s = this_op(s); 734191783Srmacklem if (s == 0) 735191783Srmacklem break; /* nothing left in the block */ 736191783Srmacklem 737191783Srmacklem /* 738191783Srmacklem * Find the next real instruction after that one 739191783Srmacklem * (skipping nops). 740191783Srmacklem */ 741191783Srmacklem next = this_op(s->next); 742191783Srmacklem if (next == 0) 743191783Srmacklem break; /* no next instruction */ 744191783Srmacklem last = next; 745191783Srmacklem 746191783Srmacklem /* 747191783Srmacklem * st M[k] --> st M[k] 748191783Srmacklem * ldx M[k] tax 749191783Srmacklem */ 750191783Srmacklem if (s->s.code == BPF_ST && 751191783Srmacklem next->s.code == (BPF_LDX|BPF_MEM) && 752191783Srmacklem s->s.k == next->s.k) { 753191783Srmacklem done = 0; 754191783Srmacklem next->s.code = BPF_MISC|BPF_TAX; 755191783Srmacklem } 756191783Srmacklem /* 757191783Srmacklem * ld #k --> ldx #k 758191783Srmacklem * tax txa 759191783Srmacklem */ 760191783Srmacklem if (s->s.code == (BPF_LD|BPF_IMM) && 761191783Srmacklem next->s.code == (BPF_MISC|BPF_TAX)) { 762191783Srmacklem s->s.code = BPF_LDX|BPF_IMM; 763191783Srmacklem next->s.code = BPF_MISC|BPF_TXA; 764191783Srmacklem done = 0; 765191783Srmacklem } 766191783Srmacklem /* 767191783Srmacklem * This is an ugly special case, but it happens 768191783Srmacklem * when you say tcp[k] or udp[k] where k is a constant. 769191783Srmacklem */ 770191783Srmacklem if (s->s.code == (BPF_LD|BPF_IMM)) { 771191783Srmacklem struct slist *add, *tax, *ild; 772191783Srmacklem 773191783Srmacklem /* 774191783Srmacklem * Check that X isn't used on exit from this 775191783Srmacklem * block (which the optimizer might cause). 776191783Srmacklem * We know the code generator won't generate 777191783Srmacklem * any local dependencies. 778191783Srmacklem */ 779191783Srmacklem if (ATOMELEM(b->out_use, X_ATOM)) 780191783Srmacklem continue; 781191783Srmacklem 782191783Srmacklem /* 783191783Srmacklem * Check that the instruction following the ldi 784191783Srmacklem * is an addx, or it's an ldxms with an addx 785191783Srmacklem * following it (with 0 or more nops between the 786191783Srmacklem * ldxms and addx). 787191783Srmacklem */ 788191783Srmacklem if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B)) 789191783Srmacklem add = next; 790191783Srmacklem else 791191783Srmacklem add = this_op(next->next); 792191783Srmacklem if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X)) 793191783Srmacklem continue; 794191783Srmacklem 795191783Srmacklem /* 796191783Srmacklem * Check that a tax follows that (with 0 or more 797191783Srmacklem * nops between them). 798191783Srmacklem */ 799191783Srmacklem tax = this_op(add->next); 800191783Srmacklem if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX)) 801191783Srmacklem continue; 802191783Srmacklem 803191783Srmacklem /* 804191783Srmacklem * Check that an ild follows that (with 0 or more 805191783Srmacklem * nops between them). 806191783Srmacklem */ 807191783Srmacklem ild = this_op(tax->next); 808191783Srmacklem if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD || 809191783Srmacklem BPF_MODE(ild->s.code) != BPF_IND) 810191783Srmacklem continue; 811191783Srmacklem /* 812191783Srmacklem * We want to turn this sequence: 813191783Srmacklem * 814191783Srmacklem * (004) ldi #0x2 {s} 815191783Srmacklem * (005) ldxms [14] {next} -- optional 816191783Srmacklem * (006) addx {add} 817191783Srmacklem * (007) tax {tax} 818191783Srmacklem * (008) ild [x+0] {ild} 819191783Srmacklem * 820191783Srmacklem * into this sequence: 821191783Srmacklem * 822191783Srmacklem * (004) nop 823191783Srmacklem * (005) ldxms [14] 824191783Srmacklem * (006) nop 825191783Srmacklem * (007) nop 826191783Srmacklem * (008) ild [x+2] 827191783Srmacklem * 828191783Srmacklem * XXX We need to check that X is not 829191783Srmacklem * subsequently used, because we want to change 830191783Srmacklem * what'll be in it after this sequence. 831191783Srmacklem * 832191783Srmacklem * We know we can eliminate the accumulator 833191783Srmacklem * modifications earlier in the sequence since 834191783Srmacklem * it is defined by the last stmt of this sequence 835191783Srmacklem * (i.e., the last statement of the sequence loads 836191783Srmacklem * a value into the accumulator, so we can eliminate 837191783Srmacklem * earlier operations on the accumulator). 838191783Srmacklem */ 839191783Srmacklem ild->s.k += s->s.k; 840191783Srmacklem s->s.code = NOP; 841191783Srmacklem add->s.code = NOP; 842191783Srmacklem tax->s.code = NOP; 843191783Srmacklem done = 0; 844191783Srmacklem } 845191783Srmacklem } 846191783Srmacklem /* 847191783Srmacklem * If the comparison at the end of a block is an equality 848191783Srmacklem * comparison against a constant, and nobody uses the value 849191783Srmacklem * we leave in the A register at the end of a block, and 850191783Srmacklem * the operation preceding the comparison is an arithmetic 851191783Srmacklem * operation, we can sometime optimize it away. 852191990Sattilio */ 853191783Srmacklem if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) && 854191783Srmacklem !ATOMELEM(b->out_use, A_ATOM)) { 855191783Srmacklem /* 856191783Srmacklem * We can optimize away certain subtractions of the 857191783Srmacklem * X register. 858191783Srmacklem */ 859191783Srmacklem if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) { 860191783Srmacklem val = b->val[X_ATOM]; 861191783Srmacklem if (vmap[val].is_const) { 862191783Srmacklem /* 863191783Srmacklem * If we have a subtract to do a comparison, 864191783Srmacklem * and the X register is a known constant, 865191783Srmacklem * we can merge this value into the 866191783Srmacklem * comparison: 867191783Srmacklem * 868191783Srmacklem * sub x -> nop 869191783Srmacklem * jeq #y jeq #(x+y) 870191783Srmacklem */ 871191783Srmacklem b->s.k += vmap[val].const_val; 872191783Srmacklem last->s.code = NOP; 873191783Srmacklem done = 0; 874191783Srmacklem } else if (b->s.k == 0) { 875191783Srmacklem /* 876191783Srmacklem * If the X register isn't a constant, 877191783Srmacklem * and the comparison in the test is 878191783Srmacklem * against 0, we can compare with the 879191783Srmacklem * X register, instead: 880191783Srmacklem * 881191783Srmacklem * sub x -> nop 882191783Srmacklem * jeq #0 jeq x 883191783Srmacklem */ 884191783Srmacklem last->s.code = NOP; 885191783Srmacklem b->s.code = BPF_JMP|BPF_JEQ|BPF_X; 886191783Srmacklem done = 0; 887191783Srmacklem } 888191783Srmacklem } 889191783Srmacklem /* 890191783Srmacklem * Likewise, a constant subtract can be simplified: 891191783Srmacklem * 892191783Srmacklem * sub #x -> nop 893191783Srmacklem * jeq #y -> jeq #(x+y) 894191783Srmacklem */ 895191783Srmacklem else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) { 896191783Srmacklem last->s.code = NOP; 897191783Srmacklem b->s.k += last->s.k; 898191783Srmacklem done = 0; 899191783Srmacklem } 900191783Srmacklem /* 901191783Srmacklem * And, similarly, a constant AND can be simplified 902191783Srmacklem * if we're testing against 0, i.e.: 903191783Srmacklem * 904191783Srmacklem * and #k nop 905191783Srmacklem * jeq #0 -> jset #k 906191783Srmacklem */ 907191783Srmacklem else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) && 908191783Srmacklem b->s.k == 0) { 909191783Srmacklem b->s.k = last->s.k; 910191783Srmacklem b->s.code = BPF_JMP|BPF_K|BPF_JSET; 911191783Srmacklem last->s.code = NOP; 912191783Srmacklem done = 0; 913191783Srmacklem opt_not(b); 914191783Srmacklem } 915191783Srmacklem } 916191783Srmacklem /* 917191783Srmacklem * jset #0 -> never 918191783Srmacklem * jset #ffffffff -> always 919191783Srmacklem */ 920191783Srmacklem if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) { 921191783Srmacklem if (b->s.k == 0) 922191783Srmacklem JT(b) = JF(b); 923191783Srmacklem if (b->s.k == 0xffffffff) 924191783Srmacklem JF(b) = JT(b); 925191783Srmacklem } 926191783Srmacklem /* 927191783Srmacklem * If we're comparing against the index register, and the index 928191783Srmacklem * register is a known constant, we can just compare against that 929191783Srmacklem * constant. 930191783Srmacklem */ 931191783Srmacklem val = b->val[X_ATOM]; 932191783Srmacklem if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) { 933191783Srmacklem bpf_int32 v = vmap[val].const_val; 934191783Srmacklem b->s.code &= ~BPF_X; 935191783Srmacklem b->s.k = v; 936191783Srmacklem } 937191783Srmacklem /* 938191783Srmacklem * If the accumulator is a known constant, we can compute the 939191783Srmacklem * comparison result. 940191783Srmacklem */ 941191783Srmacklem val = b->val[A_ATOM]; 942191783Srmacklem if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) { 943191783Srmacklem bpf_int32 v = vmap[val].const_val; 944191783Srmacklem switch (BPF_OP(b->s.code)) { 945191783Srmacklem 946191783Srmacklem case BPF_JEQ: 947191783Srmacklem v = v == b->s.k; 948191783Srmacklem break; 949191783Srmacklem 950191783Srmacklem case BPF_JGT: 951191783Srmacklem v = (unsigned)v > b->s.k; 952191783Srmacklem break; 953191783Srmacklem 954191783Srmacklem case BPF_JGE: 955191783Srmacklem v = (unsigned)v >= b->s.k; 956191783Srmacklem break; 957191783Srmacklem 958191783Srmacklem case BPF_JSET: 959191783Srmacklem v &= b->s.k; 960191783Srmacklem break; 961191783Srmacklem 962191783Srmacklem default: 963191783Srmacklem abort(); 964191783Srmacklem } 965191783Srmacklem if (JF(b) != JT(b)) 966191783Srmacklem done = 0; 967191783Srmacklem if (v) 968191783Srmacklem JF(b) = JT(b); 969191783Srmacklem else 970191783Srmacklem JT(b) = JF(b); 971191783Srmacklem } 972191783Srmacklem} 973191783Srmacklem 974191783Srmacklem/* 975191783Srmacklem * Compute the symbolic value of expression of 's', and update 976191783Srmacklem * anything it defines in the value table 'val'. If 'alter' is true, 977191783Srmacklem * do various optimizations. This code would be cleaner if symbolic 978191783Srmacklem * evaluation and code transformations weren't folded together. 979191783Srmacklem */ 980191783Srmacklemstatic void 981191783Srmacklemopt_stmt(s, val, alter) 982191783Srmacklem struct stmt *s; 983191783Srmacklem int val[]; 984191783Srmacklem int alter; 985191783Srmacklem{ 986191783Srmacklem int op; 987191783Srmacklem int v; 988191783Srmacklem 989191783Srmacklem switch (s->code) { 990191783Srmacklem 991191783Srmacklem case BPF_LD|BPF_ABS|BPF_W: 992191783Srmacklem case BPF_LD|BPF_ABS|BPF_H: 993191783Srmacklem case BPF_LD|BPF_ABS|BPF_B: 994191783Srmacklem v = F(s->code, s->k, 0L); 995191783Srmacklem vstore(s, &val[A_ATOM], v, alter); 996191783Srmacklem break; 997191783Srmacklem 998191783Srmacklem case BPF_LD|BPF_IND|BPF_W: 999191783Srmacklem case BPF_LD|BPF_IND|BPF_H: 1000191783Srmacklem case BPF_LD|BPF_IND|BPF_B: 1001191783Srmacklem v = val[X_ATOM]; 1002191783Srmacklem if (alter && vmap[v].is_const) { 1003191783Srmacklem s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code); 1004191783Srmacklem s->k += vmap[v].const_val; 1005191783Srmacklem v = F(s->code, s->k, 0L); 1006191783Srmacklem done = 0; 1007191783Srmacklem } 1008191783Srmacklem else 1009191783Srmacklem v = F(s->code, s->k, v); 1010191783Srmacklem vstore(s, &val[A_ATOM], v, alter); 1011191783Srmacklem break; 1012191783Srmacklem 1013191783Srmacklem case BPF_LD|BPF_LEN: 1014191783Srmacklem v = F(s->code, 0L, 0L); 1015191783Srmacklem vstore(s, &val[A_ATOM], v, alter); 1016191783Srmacklem break; 1017191783Srmacklem 1018191783Srmacklem case BPF_LD|BPF_IMM: 1019191783Srmacklem v = K(s->k); 1020191783Srmacklem vstore(s, &val[A_ATOM], v, alter); 1021191783Srmacklem break; 1022191783Srmacklem 1023191783Srmacklem case BPF_LDX|BPF_IMM: 1024191783Srmacklem v = K(s->k); 1025191783Srmacklem vstore(s, &val[X_ATOM], v, alter); 1026191783Srmacklem break; 1027191783Srmacklem 1028191783Srmacklem case BPF_LDX|BPF_MSH|BPF_B: 1029191783Srmacklem v = F(s->code, s->k, 0L); 1030191783Srmacklem vstore(s, &val[X_ATOM], v, alter); 1031191783Srmacklem break; 1032191783Srmacklem 1033191783Srmacklem case BPF_ALU|BPF_NEG: 1034191783Srmacklem if (alter && vmap[val[A_ATOM]].is_const) { 1035191783Srmacklem s->code = BPF_LD|BPF_IMM; 1036191783Srmacklem s->k = -vmap[val[A_ATOM]].const_val; 1037191783Srmacklem val[A_ATOM] = K(s->k); 1038191783Srmacklem } 1039191783Srmacklem else 1040191783Srmacklem val[A_ATOM] = F(s->code, val[A_ATOM], 0L); 1041191783Srmacklem break; 1042191783Srmacklem 1043191783Srmacklem case BPF_ALU|BPF_ADD|BPF_K: 1044191783Srmacklem case BPF_ALU|BPF_SUB|BPF_K: 1045191783Srmacklem case BPF_ALU|BPF_MUL|BPF_K: 1046191783Srmacklem case BPF_ALU|BPF_DIV|BPF_K: 1047191783Srmacklem case BPF_ALU|BPF_AND|BPF_K: 1048191783Srmacklem case BPF_ALU|BPF_OR|BPF_K: 1049191783Srmacklem case BPF_ALU|BPF_LSH|BPF_K: 1050191783Srmacklem case BPF_ALU|BPF_RSH|BPF_K: 1051191783Srmacklem op = BPF_OP(s->code); 1052191783Srmacklem if (alter) { 1053191783Srmacklem if (s->k == 0) { 1054191783Srmacklem /* don't optimize away "sub #0" 1055191783Srmacklem * as it may be needed later to 1056191783Srmacklem * fixup the generated math code */ 1057191783Srmacklem if (op == BPF_ADD || 1058191783Srmacklem op == BPF_LSH || op == BPF_RSH || 1059191783Srmacklem op == BPF_OR) { 1060191783Srmacklem s->code = NOP; 1061191783Srmacklem break; 1062191783Srmacklem } 1063191783Srmacklem if (op == BPF_MUL || op == BPF_AND) { 1064191783Srmacklem s->code = BPF_LD|BPF_IMM; 1065191783Srmacklem val[A_ATOM] = K(s->k); 1066191783Srmacklem break; 1067191783Srmacklem } 1068191783Srmacklem } 1069191783Srmacklem if (vmap[val[A_ATOM]].is_const) { 1070191783Srmacklem fold_op(s, val[A_ATOM], K(s->k)); 1071191783Srmacklem val[A_ATOM] = K(s->k); 1072191783Srmacklem break; 1073191783Srmacklem } 1074191783Srmacklem } 1075191783Srmacklem val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k)); 1076191783Srmacklem break; 1077191783Srmacklem 1078191783Srmacklem case BPF_ALU|BPF_ADD|BPF_X: 1079191783Srmacklem case BPF_ALU|BPF_SUB|BPF_X: 1080191783Srmacklem case BPF_ALU|BPF_MUL|BPF_X: 1081191783Srmacklem case BPF_ALU|BPF_DIV|BPF_X: 1082191783Srmacklem case BPF_ALU|BPF_AND|BPF_X: 1083191783Srmacklem case BPF_ALU|BPF_OR|BPF_X: 1084191783Srmacklem case BPF_ALU|BPF_LSH|BPF_X: 1085191783Srmacklem case BPF_ALU|BPF_RSH|BPF_X: 1086191990Sattilio op = BPF_OP(s->code); 1087191783Srmacklem if (alter && vmap[val[X_ATOM]].is_const) { 1088191990Sattilio if (vmap[val[A_ATOM]].is_const) { 1089191783Srmacklem fold_op(s, val[A_ATOM], val[X_ATOM]); 1090191783Srmacklem val[A_ATOM] = K(s->k); 1091191783Srmacklem } 1092191990Sattilio else { 1093191990Sattilio s->code = BPF_ALU|BPF_K|op; 1094191783Srmacklem s->k = vmap[val[X_ATOM]].const_val; 1095191783Srmacklem done = 0; 1096191783Srmacklem val[A_ATOM] = 1097191783Srmacklem F(s->code, val[A_ATOM], K(s->k)); 1098191783Srmacklem } 1099191783Srmacklem break; 1100191783Srmacklem } 1101191783Srmacklem /* 1102191783Srmacklem * Check if we're doing something to an accumulator 1103191783Srmacklem * that is 0, and simplify. This may not seem like 1104191783Srmacklem * much of a simplification but it could open up further 1105191783Srmacklem * optimizations. 1106191783Srmacklem * XXX We could also check for mul by 1, etc. 1107191783Srmacklem */ 1108191783Srmacklem if (alter && vmap[val[A_ATOM]].is_const 1109191783Srmacklem && vmap[val[A_ATOM]].const_val == 0) { 1110191783Srmacklem if (op == BPF_ADD || op == BPF_OR) { 1111191783Srmacklem s->code = BPF_MISC|BPF_TXA; 1112191783Srmacklem vstore(s, &val[A_ATOM], val[X_ATOM], alter); 1113191783Srmacklem break; 1114191783Srmacklem } 1115191783Srmacklem else if (op == BPF_MUL || op == BPF_DIV || 1116191783Srmacklem op == BPF_AND || op == BPF_LSH || op == BPF_RSH) { 1117191783Srmacklem s->code = BPF_LD|BPF_IMM; 1118191783Srmacklem s->k = 0; 1119191783Srmacklem vstore(s, &val[A_ATOM], K(s->k), alter); 1120191783Srmacklem break; 1121191783Srmacklem } 1122191783Srmacklem else if (op == BPF_NEG) { 1123191783Srmacklem s->code = NOP; 1124191783Srmacklem break; 1125191783Srmacklem } 1126191783Srmacklem } 1127191783Srmacklem val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]); 1128191783Srmacklem break; 1129191783Srmacklem 1130191783Srmacklem case BPF_MISC|BPF_TXA: 1131191783Srmacklem vstore(s, &val[A_ATOM], val[X_ATOM], alter); 1132191783Srmacklem break; 1133191783Srmacklem 1134191783Srmacklem case BPF_LD|BPF_MEM: 1135191783Srmacklem v = val[s->k]; 1136191783Srmacklem if (alter && vmap[v].is_const) { 1137191783Srmacklem s->code = BPF_LD|BPF_IMM; 1138191783Srmacklem s->k = vmap[v].const_val; 1139191783Srmacklem done = 0; 1140191990Sattilio } 1141191783Srmacklem vstore(s, &val[A_ATOM], v, alter); 1142191783Srmacklem break; 1143191783Srmacklem 1144191783Srmacklem case BPF_MISC|BPF_TAX: 1145191783Srmacklem vstore(s, &val[X_ATOM], val[A_ATOM], alter); 1146191783Srmacklem break; 1147191783Srmacklem 1148191783Srmacklem case BPF_LDX|BPF_MEM: 1149191783Srmacklem v = val[s->k]; 1150191783Srmacklem if (alter && vmap[v].is_const) { 1151191783Srmacklem s->code = BPF_LDX|BPF_IMM; 1152191783Srmacklem s->k = vmap[v].const_val; 1153191783Srmacklem done = 0; 1154191783Srmacklem } 1155191783Srmacklem vstore(s, &val[X_ATOM], v, alter); 1156191783Srmacklem break; 1157191783Srmacklem 1158191783Srmacklem case BPF_ST: 1159191783Srmacklem vstore(s, &val[s->k], val[A_ATOM], alter); 1160191783Srmacklem break; 1161191783Srmacklem 1162191783Srmacklem case BPF_STX: 1163191783Srmacklem vstore(s, &val[s->k], val[X_ATOM], alter); 1164191783Srmacklem break; 1165191783Srmacklem } 1166191783Srmacklem} 1167191783Srmacklem 1168191783Srmacklemstatic void 1169191783Srmacklemdeadstmt(s, last) 1170191783Srmacklem register struct stmt *s; 1171191783Srmacklem register struct stmt *last[]; 1172191783Srmacklem{ 1173191990Sattilio register int atom; 1174191783Srmacklem 1175191783Srmacklem atom = atomuse(s); 1176191990Sattilio if (atom >= 0) { 1177191783Srmacklem if (atom == AX_ATOM) { 1178191783Srmacklem last[X_ATOM] = 0; 1179191990Sattilio last[A_ATOM] = 0; 1180191990Sattilio } 1181191783Srmacklem else 1182191783Srmacklem last[atom] = 0; 1183191783Srmacklem } 1184191783Srmacklem atom = atomdef(s); 1185191783Srmacklem if (atom >= 0) { 1186191783Srmacklem if (last[atom]) { 1187191783Srmacklem done = 0; 1188191783Srmacklem last[atom]->code = NOP; 1189191783Srmacklem } 1190191783Srmacklem last[atom] = s; 1191191783Srmacklem } 1192191783Srmacklem} 1193191783Srmacklem 1194191783Srmacklemstatic void 1195191783Srmacklemopt_deadstores(b) 1196191783Srmacklem register struct block *b; 1197191783Srmacklem{ 1198191783Srmacklem register struct slist *s; 1199191783Srmacklem register int atom; 1200191783Srmacklem struct stmt *last[N_ATOMS]; 1201191783Srmacklem 1202191783Srmacklem memset((char *)last, 0, sizeof last); 1203191783Srmacklem 1204191783Srmacklem for (s = b->stmts; s != 0; s = s->next) 1205191783Srmacklem deadstmt(&s->s, last); 1206191783Srmacklem deadstmt(&b->s, last); 1207191783Srmacklem 1208191783Srmacklem for (atom = 0; atom < N_ATOMS; ++atom) 1209191783Srmacklem if (last[atom] && !ATOMELEM(b->out_use, atom)) { 1210191783Srmacklem last[atom]->code = NOP; 1211191783Srmacklem done = 0; 1212191783Srmacklem } 1213191783Srmacklem} 1214191783Srmacklem 1215191783Srmacklemstatic void 1216191783Srmacklemopt_blk(b, do_stmts) 1217191783Srmacklem struct block *b; 1218191783Srmacklem int do_stmts; 1219191783Srmacklem{ 1220191783Srmacklem struct slist *s; 1221191783Srmacklem struct edge *p; 1222191783Srmacklem int i; 1223191783Srmacklem bpf_int32 aval, xval; 1224191783Srmacklem 1225191783Srmacklem#if 0 1226191783Srmacklem for (s = b->stmts; s && s->next; s = s->next) 1227191783Srmacklem if (BPF_CLASS(s->s.code) == BPF_JMP) { 1228191783Srmacklem do_stmts = 0; 1229191783Srmacklem break; 1230191783Srmacklem } 1231191783Srmacklem#endif 1232191783Srmacklem 1233191783Srmacklem /* 1234191783Srmacklem * Initialize the atom values. 1235191783Srmacklem */ 1236191783Srmacklem p = b->in_edges; 1237191783Srmacklem if (p == 0) { 1238191783Srmacklem /* 1239191783Srmacklem * We have no predecessors, so everything is undefined 1240191783Srmacklem * upon entry to this block. 1241191783Srmacklem */ 1242191783Srmacklem memset((char *)b->val, 0, sizeof(b->val)); 1243191783Srmacklem } else { 1244191783Srmacklem /* 1245191783Srmacklem * Inherit values from our predecessors. 1246191783Srmacklem * 1247191783Srmacklem * First, get the values from the predecessor along the 1248191783Srmacklem * first edge leading to this node. 1249191783Srmacklem */ 1250191783Srmacklem memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val)); 1251191783Srmacklem /* 1252191783Srmacklem * Now look at all the other nodes leading to this node. 1253191783Srmacklem * If, for the predecessor along that edge, a register 1254191783Srmacklem * has a different value from the one we have (i.e., 1255191783Srmacklem * control paths are merging, and the merging paths 1256191783Srmacklem * assign different values to that register), give the 1257191783Srmacklem * register the undefined value of 0. 1258191783Srmacklem */ 1259191783Srmacklem while ((p = p->next) != NULL) { 1260191783Srmacklem for (i = 0; i < N_ATOMS; ++i) 1261191783Srmacklem if (b->val[i] != p->pred->val[i]) 1262191783Srmacklem b->val[i] = 0; 1263191783Srmacklem } 1264191783Srmacklem } 1265191783Srmacklem aval = b->val[A_ATOM]; 1266191783Srmacklem xval = b->val[X_ATOM]; 1267191783Srmacklem for (s = b->stmts; s; s = s->next) 1268191783Srmacklem opt_stmt(&s->s, b->val, do_stmts); 1269191783Srmacklem 1270191783Srmacklem /* 1271191783Srmacklem * This is a special case: if we don't use anything from this 1272191783Srmacklem * block, and we load the accumulator or index register with a 1273191783Srmacklem * value that is already there, or if this block is a return, 1274191783Srmacklem * eliminate all the statements. 1275191783Srmacklem * 1276191783Srmacklem * XXX - what if it does a store? 1277191783Srmacklem * 1278 * XXX - why does it matter whether we use anything from this 1279 * block? If the accumulator or index register doesn't change 1280 * its value, isn't that OK even if we use that value? 1281 * 1282 * XXX - if we load the accumulator with a different value, 1283 * and the block ends with a conditional branch, we obviously 1284 * can't eliminate it, as the branch depends on that value. 1285 * For the index register, the conditional branch only depends 1286 * on the index register value if the test is against the index 1287 * register value rather than a constant; if nothing uses the 1288 * value we put into the index register, and we're not testing 1289 * against the index register's value, and there aren't any 1290 * other problems that would keep us from eliminating this 1291 * block, can we eliminate it? 1292 */ 1293 if (do_stmts && 1294 ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval && 1295 xval != 0 && b->val[X_ATOM] == xval) || 1296 BPF_CLASS(b->s.code) == BPF_RET)) { 1297 if (b->stmts != 0) { 1298 b->stmts = 0; 1299 done = 0; 1300 } 1301 } else { 1302 opt_peep(b); 1303 opt_deadstores(b); 1304 } 1305 /* 1306 * Set up values for branch optimizer. 1307 */ 1308 if (BPF_SRC(b->s.code) == BPF_K) 1309 b->oval = K(b->s.k); 1310 else 1311 b->oval = b->val[X_ATOM]; 1312 b->et.code = b->s.code; 1313 b->ef.code = -b->s.code; 1314} 1315 1316/* 1317 * Return true if any register that is used on exit from 'succ', has 1318 * an exit value that is different from the corresponding exit value 1319 * from 'b'. 1320 */ 1321static int 1322use_conflict(b, succ) 1323 struct block *b, *succ; 1324{ 1325 int atom; 1326 atomset use = succ->out_use; 1327 1328 if (use == 0) 1329 return 0; 1330 1331 for (atom = 0; atom < N_ATOMS; ++atom) 1332 if (ATOMELEM(use, atom)) 1333 if (b->val[atom] != succ->val[atom]) 1334 return 1; 1335 return 0; 1336} 1337 1338static struct block * 1339fold_edge(child, ep) 1340 struct block *child; 1341 struct edge *ep; 1342{ 1343 int sense; 1344 int aval0, aval1, oval0, oval1; 1345 int code = ep->code; 1346 1347 if (code < 0) { 1348 code = -code; 1349 sense = 0; 1350 } else 1351 sense = 1; 1352 1353 if (child->s.code != code) 1354 return 0; 1355 1356 aval0 = child->val[A_ATOM]; 1357 oval0 = child->oval; 1358 aval1 = ep->pred->val[A_ATOM]; 1359 oval1 = ep->pred->oval; 1360 1361 if (aval0 != aval1) 1362 return 0; 1363 1364 if (oval0 == oval1) 1365 /* 1366 * The operands of the branch instructions are 1367 * identical, so the result is true if a true 1368 * branch was taken to get here, otherwise false. 1369 */ 1370 return sense ? JT(child) : JF(child); 1371 1372 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K)) 1373 /* 1374 * At this point, we only know the comparison if we 1375 * came down the true branch, and it was an equality 1376 * comparison with a constant. 1377 * 1378 * I.e., if we came down the true branch, and the branch 1379 * was an equality comparison with a constant, we know the 1380 * accumulator contains that constant. If we came down 1381 * the false branch, or the comparison wasn't with a 1382 * constant, we don't know what was in the accumulator. 1383 * 1384 * We rely on the fact that distinct constants have distinct 1385 * value numbers. 1386 */ 1387 return JF(child); 1388 1389 return 0; 1390} 1391 1392static void 1393opt_j(ep) 1394 struct edge *ep; 1395{ 1396 register int i, k; 1397 register struct block *target; 1398 1399 if (JT(ep->succ) == 0) 1400 return; 1401 1402 if (JT(ep->succ) == JF(ep->succ)) { 1403 /* 1404 * Common branch targets can be eliminated, provided 1405 * there is no data dependency. 1406 */ 1407 if (!use_conflict(ep->pred, ep->succ->et.succ)) { 1408 done = 0; 1409 ep->succ = JT(ep->succ); 1410 } 1411 } 1412 /* 1413 * For each edge dominator that matches the successor of this 1414 * edge, promote the edge successor to the its grandchild. 1415 * 1416 * XXX We violate the set abstraction here in favor a reasonably 1417 * efficient loop. 1418 */ 1419 top: 1420 for (i = 0; i < edgewords; ++i) { 1421 register bpf_u_int32 x = ep->edom[i]; 1422 1423 while (x != 0) { 1424 k = ffs(x) - 1; 1425 x &=~ (1 << k); 1426 k += i * BITS_PER_WORD; 1427 1428 target = fold_edge(ep->succ, edges[k]); 1429 /* 1430 * Check that there is no data dependency between 1431 * nodes that will be violated if we move the edge. 1432 */ 1433 if (target != 0 && !use_conflict(ep->pred, target)) { 1434 done = 0; 1435 ep->succ = target; 1436 if (JT(target) != 0) 1437 /* 1438 * Start over unless we hit a leaf. 1439 */ 1440 goto top; 1441 return; 1442 } 1443 } 1444 } 1445} 1446 1447 1448static void 1449or_pullup(b) 1450 struct block *b; 1451{ 1452 int val, at_top; 1453 struct block *pull; 1454 struct block **diffp, **samep; 1455 struct edge *ep; 1456 1457 ep = b->in_edges; 1458 if (ep == 0) 1459 return; 1460 1461 /* 1462 * Make sure each predecessor loads the same value. 1463 * XXX why? 1464 */ 1465 val = ep->pred->val[A_ATOM]; 1466 for (ep = ep->next; ep != 0; ep = ep->next) 1467 if (val != ep->pred->val[A_ATOM]) 1468 return; 1469 1470 if (JT(b->in_edges->pred) == b) 1471 diffp = &JT(b->in_edges->pred); 1472 else 1473 diffp = &JF(b->in_edges->pred); 1474 1475 at_top = 1; 1476 while (1) { 1477 if (*diffp == 0) 1478 return; 1479 1480 if (JT(*diffp) != JT(b)) 1481 return; 1482 1483 if (!SET_MEMBER((*diffp)->dom, b->id)) 1484 return; 1485 1486 if ((*diffp)->val[A_ATOM] != val) 1487 break; 1488 1489 diffp = &JF(*diffp); 1490 at_top = 0; 1491 } 1492 samep = &JF(*diffp); 1493 while (1) { 1494 if (*samep == 0) 1495 return; 1496 1497 if (JT(*samep) != JT(b)) 1498 return; 1499 1500 if (!SET_MEMBER((*samep)->dom, b->id)) 1501 return; 1502 1503 if ((*samep)->val[A_ATOM] == val) 1504 break; 1505 1506 /* XXX Need to check that there are no data dependencies 1507 between dp0 and dp1. Currently, the code generator 1508 will not produce such dependencies. */ 1509 samep = &JF(*samep); 1510 } 1511#ifdef notdef 1512 /* XXX This doesn't cover everything. */ 1513 for (i = 0; i < N_ATOMS; ++i) 1514 if ((*samep)->val[i] != pred->val[i]) 1515 return; 1516#endif 1517 /* Pull up the node. */ 1518 pull = *samep; 1519 *samep = JF(pull); 1520 JF(pull) = *diffp; 1521 1522 /* 1523 * At the top of the chain, each predecessor needs to point at the 1524 * pulled up node. Inside the chain, there is only one predecessor 1525 * to worry about. 1526 */ 1527 if (at_top) { 1528 for (ep = b->in_edges; ep != 0; ep = ep->next) { 1529 if (JT(ep->pred) == b) 1530 JT(ep->pred) = pull; 1531 else 1532 JF(ep->pred) = pull; 1533 } 1534 } 1535 else 1536 *diffp = pull; 1537 1538 done = 0; 1539} 1540 1541static void 1542and_pullup(b) 1543 struct block *b; 1544{ 1545 int val, at_top; 1546 struct block *pull; 1547 struct block **diffp, **samep; 1548 struct edge *ep; 1549 1550 ep = b->in_edges; 1551 if (ep == 0) 1552 return; 1553 1554 /* 1555 * Make sure each predecessor loads the same value. 1556 */ 1557 val = ep->pred->val[A_ATOM]; 1558 for (ep = ep->next; ep != 0; ep = ep->next) 1559 if (val != ep->pred->val[A_ATOM]) 1560 return; 1561 1562 if (JT(b->in_edges->pred) == b) 1563 diffp = &JT(b->in_edges->pred); 1564 else 1565 diffp = &JF(b->in_edges->pred); 1566 1567 at_top = 1; 1568 while (1) { 1569 if (*diffp == 0) 1570 return; 1571 1572 if (JF(*diffp) != JF(b)) 1573 return; 1574 1575 if (!SET_MEMBER((*diffp)->dom, b->id)) 1576 return; 1577 1578 if ((*diffp)->val[A_ATOM] != val) 1579 break; 1580 1581 diffp = &JT(*diffp); 1582 at_top = 0; 1583 } 1584 samep = &JT(*diffp); 1585 while (1) { 1586 if (*samep == 0) 1587 return; 1588 1589 if (JF(*samep) != JF(b)) 1590 return; 1591 1592 if (!SET_MEMBER((*samep)->dom, b->id)) 1593 return; 1594 1595 if ((*samep)->val[A_ATOM] == val) 1596 break; 1597 1598 /* XXX Need to check that there are no data dependencies 1599 between diffp and samep. Currently, the code generator 1600 will not produce such dependencies. */ 1601 samep = &JT(*samep); 1602 } 1603#ifdef notdef 1604 /* XXX This doesn't cover everything. */ 1605 for (i = 0; i < N_ATOMS; ++i) 1606 if ((*samep)->val[i] != pred->val[i]) 1607 return; 1608#endif 1609 /* Pull up the node. */ 1610 pull = *samep; 1611 *samep = JT(pull); 1612 JT(pull) = *diffp; 1613 1614 /* 1615 * At the top of the chain, each predecessor needs to point at the 1616 * pulled up node. Inside the chain, there is only one predecessor 1617 * to worry about. 1618 */ 1619 if (at_top) { 1620 for (ep = b->in_edges; ep != 0; ep = ep->next) { 1621 if (JT(ep->pred) == b) 1622 JT(ep->pred) = pull; 1623 else 1624 JF(ep->pred) = pull; 1625 } 1626 } 1627 else 1628 *diffp = pull; 1629 1630 done = 0; 1631} 1632 1633static void 1634opt_blks(root, do_stmts) 1635 struct block *root; 1636 int do_stmts; 1637{ 1638 int i, maxlevel; 1639 struct block *p; 1640 1641 init_val(); 1642 maxlevel = root->level; 1643 1644 find_inedges(root); 1645 for (i = maxlevel; i >= 0; --i) 1646 for (p = levels[i]; p; p = p->link) 1647 opt_blk(p, do_stmts); 1648 1649 if (do_stmts) 1650 /* 1651 * No point trying to move branches; it can't possibly 1652 * make a difference at this point. 1653 */ 1654 return; 1655 1656 for (i = 1; i <= maxlevel; ++i) { 1657 for (p = levels[i]; p; p = p->link) { 1658 opt_j(&p->et); 1659 opt_j(&p->ef); 1660 } 1661 } 1662 1663 find_inedges(root); 1664 for (i = 1; i <= maxlevel; ++i) { 1665 for (p = levels[i]; p; p = p->link) { 1666 or_pullup(p); 1667 and_pullup(p); 1668 } 1669 } 1670} 1671 1672static inline void 1673link_inedge(parent, child) 1674 struct edge *parent; 1675 struct block *child; 1676{ 1677 parent->next = child->in_edges; 1678 child->in_edges = parent; 1679} 1680 1681static void 1682find_inedges(root) 1683 struct block *root; 1684{ 1685 int i; 1686 struct block *b; 1687 1688 for (i = 0; i < n_blocks; ++i) 1689 blocks[i]->in_edges = 0; 1690 1691 /* 1692 * Traverse the graph, adding each edge to the predecessor 1693 * list of its successors. Skip the leaves (i.e. level 0). 1694 */ 1695 for (i = root->level; i > 0; --i) { 1696 for (b = levels[i]; b != 0; b = b->link) { 1697 link_inedge(&b->et, JT(b)); 1698 link_inedge(&b->ef, JF(b)); 1699 } 1700 } 1701} 1702 1703static void 1704opt_root(b) 1705 struct block **b; 1706{ 1707 struct slist *tmp, *s; 1708 1709 s = (*b)->stmts; 1710 (*b)->stmts = 0; 1711 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b)) 1712 *b = JT(*b); 1713 1714 tmp = (*b)->stmts; 1715 if (tmp != 0) 1716 sappend(s, tmp); 1717 (*b)->stmts = s; 1718 1719 /* 1720 * If the root node is a return, then there is no 1721 * point executing any statements (since the bpf machine 1722 * has no side effects). 1723 */ 1724 if (BPF_CLASS((*b)->s.code) == BPF_RET) 1725 (*b)->stmts = 0; 1726} 1727 1728static void 1729opt_loop(root, do_stmts) 1730 struct block *root; 1731 int do_stmts; 1732{ 1733 1734#ifdef BDEBUG 1735 if (dflag > 1) { 1736 printf("opt_loop(root, %d) begin\n", do_stmts); 1737 opt_dump(root); 1738 } 1739#endif 1740 do { 1741 done = 1; 1742 find_levels(root); 1743 find_dom(root); 1744 find_closure(root); 1745 find_ud(root); 1746 find_edom(root); 1747 opt_blks(root, do_stmts); 1748#ifdef BDEBUG 1749 if (dflag > 1) { 1750 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done); 1751 opt_dump(root); 1752 } 1753#endif 1754 } while (!done); 1755} 1756 1757/* 1758 * Optimize the filter code in its dag representation. 1759 */ 1760void 1761bpf_optimize(rootp) 1762 struct block **rootp; 1763{ 1764 struct block *root; 1765 1766 root = *rootp; 1767 1768 opt_init(root); 1769 opt_loop(root, 0); 1770 opt_loop(root, 1); 1771 intern_blocks(root); 1772#ifdef BDEBUG 1773 if (dflag > 1) { 1774 printf("after intern_blocks()\n"); 1775 opt_dump(root); 1776 } 1777#endif 1778 opt_root(rootp); 1779#ifdef BDEBUG 1780 if (dflag > 1) { 1781 printf("after opt_root()\n"); 1782 opt_dump(root); 1783 } 1784#endif 1785 opt_cleanup(); 1786} 1787 1788static void 1789make_marks(p) 1790 struct block *p; 1791{ 1792 if (!isMarked(p)) { 1793 Mark(p); 1794 if (BPF_CLASS(p->s.code) != BPF_RET) { 1795 make_marks(JT(p)); 1796 make_marks(JF(p)); 1797 } 1798 } 1799} 1800 1801/* 1802 * Mark code array such that isMarked(i) is true 1803 * only for nodes that are alive. 1804 */ 1805static void 1806mark_code(p) 1807 struct block *p; 1808{ 1809 cur_mark += 1; 1810 make_marks(p); 1811} 1812 1813/* 1814 * True iff the two stmt lists load the same value from the packet into 1815 * the accumulator. 1816 */ 1817static int 1818eq_slist(x, y) 1819 struct slist *x, *y; 1820{ 1821 while (1) { 1822 while (x && x->s.code == NOP) 1823 x = x->next; 1824 while (y && y->s.code == NOP) 1825 y = y->next; 1826 if (x == 0) 1827 return y == 0; 1828 if (y == 0) 1829 return x == 0; 1830 if (x->s.code != y->s.code || x->s.k != y->s.k) 1831 return 0; 1832 x = x->next; 1833 y = y->next; 1834 } 1835} 1836 1837static inline int 1838eq_blk(b0, b1) 1839 struct block *b0, *b1; 1840{ 1841 if (b0->s.code == b1->s.code && 1842 b0->s.k == b1->s.k && 1843 b0->et.succ == b1->et.succ && 1844 b0->ef.succ == b1->ef.succ) 1845 return eq_slist(b0->stmts, b1->stmts); 1846 return 0; 1847} 1848 1849static void 1850intern_blocks(root) 1851 struct block *root; 1852{ 1853 struct block *p; 1854 int i, j; 1855 int done1; /* don't shadow global */ 1856 top: 1857 done1 = 1; 1858 for (i = 0; i < n_blocks; ++i) 1859 blocks[i]->link = 0; 1860 1861 mark_code(root); 1862 1863 for (i = n_blocks - 1; --i >= 0; ) { 1864 if (!isMarked(blocks[i])) 1865 continue; 1866 for (j = i + 1; j < n_blocks; ++j) { 1867 if (!isMarked(blocks[j])) 1868 continue; 1869 if (eq_blk(blocks[i], blocks[j])) { 1870 blocks[i]->link = blocks[j]->link ? 1871 blocks[j]->link : blocks[j]; 1872 break; 1873 } 1874 } 1875 } 1876 for (i = 0; i < n_blocks; ++i) { 1877 p = blocks[i]; 1878 if (JT(p) == 0) 1879 continue; 1880 if (JT(p)->link) { 1881 done1 = 0; 1882 JT(p) = JT(p)->link; 1883 } 1884 if (JF(p)->link) { 1885 done1 = 0; 1886 JF(p) = JF(p)->link; 1887 } 1888 } 1889 if (!done1) 1890 goto top; 1891} 1892 1893static void 1894opt_cleanup() 1895{ 1896 free((void *)vnode_base); 1897 free((void *)vmap); 1898 free((void *)edges); 1899 free((void *)space); 1900 free((void *)levels); 1901 free((void *)blocks); 1902} 1903 1904/* 1905 * Return the number of stmts in 's'. 1906 */ 1907static u_int 1908slength(s) 1909 struct slist *s; 1910{ 1911 u_int n = 0; 1912 1913 for (; s; s = s->next) 1914 if (s->s.code != NOP) 1915 ++n; 1916 return n; 1917} 1918 1919/* 1920 * Return the number of nodes reachable by 'p'. 1921 * All nodes should be initially unmarked. 1922 */ 1923static int 1924count_blocks(p) 1925 struct block *p; 1926{ 1927 if (p == 0 || isMarked(p)) 1928 return 0; 1929 Mark(p); 1930 return count_blocks(JT(p)) + count_blocks(JF(p)) + 1; 1931} 1932 1933/* 1934 * Do a depth first search on the flow graph, numbering the 1935 * the basic blocks, and entering them into the 'blocks' array.` 1936 */ 1937static void 1938number_blks_r(p) 1939 struct block *p; 1940{ 1941 int n; 1942 1943 if (p == 0 || isMarked(p)) 1944 return; 1945 1946 Mark(p); 1947 n = n_blocks++; 1948 p->id = n; 1949 blocks[n] = p; 1950 1951 number_blks_r(JT(p)); 1952 number_blks_r(JF(p)); 1953} 1954 1955/* 1956 * Return the number of stmts in the flowgraph reachable by 'p'. 1957 * The nodes should be unmarked before calling. 1958 * 1959 * Note that "stmts" means "instructions", and that this includes 1960 * 1961 * side-effect statements in 'p' (slength(p->stmts)); 1962 * 1963 * statements in the true branch from 'p' (count_stmts(JT(p))); 1964 * 1965 * statements in the false branch from 'p' (count_stmts(JF(p))); 1966 * 1967 * the conditional jump itself (1); 1968 * 1969 * an extra long jump if the true branch requires it (p->longjt); 1970 * 1971 * an extra long jump if the false branch requires it (p->longjf). 1972 */ 1973static u_int 1974count_stmts(p) 1975 struct block *p; 1976{ 1977 u_int n; 1978 1979 if (p == 0 || isMarked(p)) 1980 return 0; 1981 Mark(p); 1982 n = count_stmts(JT(p)) + count_stmts(JF(p)); 1983 return slength(p->stmts) + n + 1 + p->longjt + p->longjf; 1984} 1985 1986/* 1987 * Allocate memory. All allocation is done before optimization 1988 * is begun. A linear bound on the size of all data structures is computed 1989 * from the total number of blocks and/or statements. 1990 */ 1991static void 1992opt_init(root) 1993 struct block *root; 1994{ 1995 bpf_u_int32 *p; 1996 int i, n, max_stmts; 1997 1998 /* 1999 * First, count the blocks, so we can malloc an array to map 2000 * block number to block. Then, put the blocks into the array. 2001 */ 2002 unMarkAll(); 2003 n = count_blocks(root); 2004 blocks = (struct block **)calloc(n, sizeof(*blocks)); 2005 if (blocks == NULL) 2006 bpf_error("malloc"); 2007 unMarkAll(); 2008 n_blocks = 0; 2009 number_blks_r(root); 2010 2011 n_edges = 2 * n_blocks; 2012 edges = (struct edge **)calloc(n_edges, sizeof(*edges)); 2013 if (edges == NULL) 2014 bpf_error("malloc"); 2015 2016 /* 2017 * The number of levels is bounded by the number of nodes. 2018 */ 2019 levels = (struct block **)calloc(n_blocks, sizeof(*levels)); 2020 if (levels == NULL) 2021 bpf_error("malloc"); 2022 2023 edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1; 2024 nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1; 2025 2026 /* XXX */ 2027 space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space) 2028 + n_edges * edgewords * sizeof(*space)); 2029 if (space == NULL) 2030 bpf_error("malloc"); 2031 p = space; 2032 all_dom_sets = p; 2033 for (i = 0; i < n; ++i) { 2034 blocks[i]->dom = p; 2035 p += nodewords; 2036 } 2037 all_closure_sets = p; 2038 for (i = 0; i < n; ++i) { 2039 blocks[i]->closure = p; 2040 p += nodewords; 2041 } 2042 all_edge_sets = p; 2043 for (i = 0; i < n; ++i) { 2044 register struct block *b = blocks[i]; 2045 2046 b->et.edom = p; 2047 p += edgewords; 2048 b->ef.edom = p; 2049 p += edgewords; 2050 b->et.id = i; 2051 edges[i] = &b->et; 2052 b->ef.id = n_blocks + i; 2053 edges[n_blocks + i] = &b->ef; 2054 b->et.pred = b; 2055 b->ef.pred = b; 2056 } 2057 max_stmts = 0; 2058 for (i = 0; i < n; ++i) 2059 max_stmts += slength(blocks[i]->stmts) + 1; 2060 /* 2061 * We allocate at most 3 value numbers per statement, 2062 * so this is an upper bound on the number of valnodes 2063 * we'll need. 2064 */ 2065 maxval = 3 * max_stmts; 2066 vmap = (struct vmapinfo *)calloc(maxval, sizeof(*vmap)); 2067 vnode_base = (struct valnode *)calloc(maxval, sizeof(*vnode_base)); 2068 if (vmap == NULL || vnode_base == NULL) 2069 bpf_error("malloc"); 2070} 2071 2072/* 2073 * Some pointers used to convert the basic block form of the code, 2074 * into the array form that BPF requires. 'fstart' will point to 2075 * the malloc'd array while 'ftail' is used during the recursive traversal. 2076 */ 2077static struct bpf_insn *fstart; 2078static struct bpf_insn *ftail; 2079 2080#ifdef BDEBUG 2081int bids[1000]; 2082#endif 2083 2084/* 2085 * Returns true if successful. Returns false if a branch has 2086 * an offset that is too large. If so, we have marked that 2087 * branch so that on a subsequent iteration, it will be treated 2088 * properly. 2089 */ 2090static int 2091convert_code_r(p) 2092 struct block *p; 2093{ 2094 struct bpf_insn *dst; 2095 struct slist *src; 2096 int slen; 2097 u_int off; 2098 int extrajmps; /* number of extra jumps inserted */ 2099 struct slist **offset = NULL; 2100 2101 if (p == 0 || isMarked(p)) 2102 return (1); 2103 Mark(p); 2104 2105 if (convert_code_r(JF(p)) == 0) 2106 return (0); 2107 if (convert_code_r(JT(p)) == 0) 2108 return (0); 2109 2110 slen = slength(p->stmts); 2111 dst = ftail -= (slen + 1 + p->longjt + p->longjf); 2112 /* inflate length by any extra jumps */ 2113 2114 p->offset = dst - fstart; 2115 2116 /* generate offset[] for convenience */ 2117 if (slen) { 2118 offset = (struct slist **)calloc(slen, sizeof(struct slist *)); 2119 if (!offset) { 2120 bpf_error("not enough core"); 2121 /*NOTREACHED*/ 2122 } 2123 } 2124 src = p->stmts; 2125 for (off = 0; off < slen && src; off++) { 2126#if 0 2127 printf("off=%d src=%x\n", off, src); 2128#endif 2129 offset[off] = src; 2130 src = src->next; 2131 } 2132 2133 off = 0; 2134 for (src = p->stmts; src; src = src->next) { 2135 if (src->s.code == NOP) 2136 continue; 2137 dst->code = (u_short)src->s.code; 2138 dst->k = src->s.k; 2139 2140 /* fill block-local relative jump */ 2141 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) { 2142#if 0 2143 if (src->s.jt || src->s.jf) { 2144 bpf_error("illegal jmp destination"); 2145 /*NOTREACHED*/ 2146 } 2147#endif 2148 goto filled; 2149 } 2150 if (off == slen - 2) /*???*/ 2151 goto filled; 2152 2153 { 2154 int i; 2155 int jt, jf; 2156 const char *ljerr = "%s for block-local relative jump: off=%d"; 2157 2158#if 0 2159 printf("code=%x off=%d %x %x\n", src->s.code, 2160 off, src->s.jt, src->s.jf); 2161#endif 2162 2163 if (!src->s.jt || !src->s.jf) { 2164 bpf_error(ljerr, "no jmp destination", off); 2165 /*NOTREACHED*/ 2166 } 2167 2168 jt = jf = 0; 2169 for (i = 0; i < slen; i++) { 2170 if (offset[i] == src->s.jt) { 2171 if (jt) { 2172 bpf_error(ljerr, "multiple matches", off); 2173 /*NOTREACHED*/ 2174 } 2175 2176 dst->jt = i - off - 1; 2177 jt++; 2178 } 2179 if (offset[i] == src->s.jf) { 2180 if (jf) { 2181 bpf_error(ljerr, "multiple matches", off); 2182 /*NOTREACHED*/ 2183 } 2184 dst->jf = i - off - 1; 2185 jf++; 2186 } 2187 } 2188 if (!jt || !jf) { 2189 bpf_error(ljerr, "no destination found", off); 2190 /*NOTREACHED*/ 2191 } 2192 } 2193filled: 2194 ++dst; 2195 ++off; 2196 } 2197 if (offset) 2198 free(offset); 2199 2200#ifdef BDEBUG 2201 bids[dst - fstart] = p->id + 1; 2202#endif 2203 dst->code = (u_short)p->s.code; 2204 dst->k = p->s.k; 2205 if (JT(p)) { 2206 extrajmps = 0; 2207 off = JT(p)->offset - (p->offset + slen) - 1; 2208 if (off >= 256) { 2209 /* offset too large for branch, must add a jump */ 2210 if (p->longjt == 0) { 2211 /* mark this instruction and retry */ 2212 p->longjt++; 2213 return(0); 2214 } 2215 /* branch if T to following jump */ 2216 dst->jt = extrajmps; 2217 extrajmps++; 2218 dst[extrajmps].code = BPF_JMP|BPF_JA; 2219 dst[extrajmps].k = off - extrajmps; 2220 } 2221 else 2222 dst->jt = off; 2223 off = JF(p)->offset - (p->offset + slen) - 1; 2224 if (off >= 256) { 2225 /* offset too large for branch, must add a jump */ 2226 if (p->longjf == 0) { 2227 /* mark this instruction and retry */ 2228 p->longjf++; 2229 return(0); 2230 } 2231 /* branch if F to following jump */ 2232 /* if two jumps are inserted, F goes to second one */ 2233 dst->jf = extrajmps; 2234 extrajmps++; 2235 dst[extrajmps].code = BPF_JMP|BPF_JA; 2236 dst[extrajmps].k = off - extrajmps; 2237 } 2238 else 2239 dst->jf = off; 2240 } 2241 return (1); 2242} 2243 2244 2245/* 2246 * Convert flowgraph intermediate representation to the 2247 * BPF array representation. Set *lenp to the number of instructions. 2248 * 2249 * This routine does *NOT* leak the memory pointed to by fp. It *must 2250 * not* do free(fp) before returning fp; doing so would make no sense, 2251 * as the BPF array pointed to by the return value of icode_to_fcode() 2252 * must be valid - it's being returned for use in a bpf_program structure. 2253 * 2254 * If it appears that icode_to_fcode() is leaking, the problem is that 2255 * the program using pcap_compile() is failing to free the memory in 2256 * the BPF program when it's done - the leak is in the program, not in 2257 * the routine that happens to be allocating the memory. (By analogy, if 2258 * a program calls fopen() without ever calling fclose() on the FILE *, 2259 * it will leak the FILE structure; the leak is not in fopen(), it's in 2260 * the program.) Change the program to use pcap_freecode() when it's 2261 * done with the filter program. See the pcap man page. 2262 */ 2263struct bpf_insn * 2264icode_to_fcode(root, lenp) 2265 struct block *root; 2266 u_int *lenp; 2267{ 2268 u_int n; 2269 struct bpf_insn *fp; 2270 2271 /* 2272 * Loop doing convert_code_r() until no branches remain 2273 * with too-large offsets. 2274 */ 2275 while (1) { 2276 unMarkAll(); 2277 n = *lenp = count_stmts(root); 2278 2279 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n); 2280 if (fp == NULL) 2281 bpf_error("malloc"); 2282 memset((char *)fp, 0, sizeof(*fp) * n); 2283 fstart = fp; 2284 ftail = fp + n; 2285 2286 unMarkAll(); 2287 if (convert_code_r(root)) 2288 break; 2289 free(fp); 2290 } 2291 2292 return fp; 2293} 2294 2295/* 2296 * Make a copy of a BPF program and put it in the "fcode" member of 2297 * a "pcap_t". 2298 * 2299 * If we fail to allocate memory for the copy, fill in the "errbuf" 2300 * member of the "pcap_t" with an error message, and return -1; 2301 * otherwise, return 0. 2302 */ 2303int 2304install_bpf_program(pcap_t *p, struct bpf_program *fp) 2305{ 2306 size_t prog_size; 2307 2308 /* 2309 * Validate the program. 2310 */ 2311 if (!bpf_validate(fp->bf_insns, fp->bf_len)) { 2312 snprintf(p->errbuf, sizeof(p->errbuf), 2313 "BPF program is not valid"); 2314 return (-1); 2315 } 2316 2317 /* 2318 * Free up any already installed program. 2319 */ 2320 pcap_freecode(&p->fcode); 2321 2322 prog_size = sizeof(*fp->bf_insns) * fp->bf_len; 2323 p->fcode.bf_len = fp->bf_len; 2324 p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size); 2325 if (p->fcode.bf_insns == NULL) { 2326 snprintf(p->errbuf, sizeof(p->errbuf), 2327 "malloc: %s", pcap_strerror(errno)); 2328 return (-1); 2329 } 2330 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size); 2331 return (0); 2332} 2333 2334#ifdef BDEBUG 2335static void 2336opt_dump(root) 2337 struct block *root; 2338{ 2339 struct bpf_program f; 2340 2341 memset(bids, 0, sizeof bids); 2342 f.bf_insns = icode_to_fcode(root, &f.bf_len); 2343 bpf_dump(&f, 1); 2344 putchar('\n'); 2345 free((char *)f.bf_insns); 2346} 2347#endif 2348