Cross Reference: /freebsd-10.2-release/contrib/libpcap/optimize.c

Deleted Added

sdiff udiff text old ( 214518 ) new ( 241231 )

full compact

optimize.c (214518)	optimize.c (241231)
1/* 2 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that: (1) source code distributions 7 * retain the above copyright notice and this paragraph in its entirety, (2) 8 * distributions including binary code include the above copyright notice and 9 * this paragraph in its entirety in the documentation or other materials 10 * provided with the distribution, and (3) all advertising materials mentioning 11 * features or use of this software display the following acknowledgement: 12 * ``This product includes software developed by the University of California, 13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of 14 * the University nor the names of its contributors may be used to endorse 15 * or promote products derived from this software without specific prior 16 * written permission. 17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 20 * 21 * Optimization module for tcpdump intermediate representation. 22 / 23#ifndef lint 24static const char rcsid[] _U_ = 25 "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.91 2008-01-02 04:16:46 guy Exp $ (LBL)"; 26#endif 27 28#ifdef HAVE_CONFIG_H 29#include "config.h" 30#endif 31 32#ifdef WIN32 33#include <pcap-stdinc.h> 34#else / WIN32 / 35#if HAVE_INTTYPES_H 36#include <inttypes.h> 37#elif HAVE_STDINT_H 38#include <stdint.h> 39#endif 40#ifdef HAVE_SYS_BITYPES_H 41#include <sys/bitypes.h> 42#endif 43#include <sys/types.h> 44#endif / WIN32 / 45 46#include <stdio.h> 47#include <stdlib.h> 48#include <memory.h> 49#include <string.h> 50 51#include <errno.h> 52 53#include "pcap-int.h" 54 55#include "gencode.h" 56 57#ifdef HAVE_OS_PROTO_H 58#include "os-proto.h" 59#endif 60 61#ifdef BDEBUG 62extern int dflag; 63#endif 64 65#if defined(MSDOS) && !defined(__DJGPP__) 66extern int _w32_ffs (int mask); 67#define ffs _w32_ffs 68#endif 69 70#if defined(WIN32) && defined (_MSC_VER) 71int ffs(int mask); 72#endif 73 74/ 75 * Represents a deleted instruction. 76 / 77#define NOP -1 78 79/ 80 * Register numbers for use-def values. 81 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory 82 * location. A_ATOM is the accumulator and X_ATOM is the index 83 * register. 84 / 85#define A_ATOM BPF_MEMWORDS 86#define X_ATOM (BPF_MEMWORDS+1) 87 88/ 89 * This define is used to represent both the accumulator and 90 * x register in use-def computations. 91 * Currently, the use-def code assumes only one definition per instruction. 92 / 93#define AX_ATOM N_ATOMS 94 95/ 96 * A flag to indicate that further optimization is needed. 97 * Iterative passes are continued until a given pass yields no 98 * branch movement. 99 / 100static int done; 101* 102/* 103 * A block is marked if only if its mark equals the current mark. 104 * Rather than traverse the code array, marking each item, 'cur_mark' is 105 * incremented. This automatically makes each element unmarked. 106 / 107static int cur_mark; 108#define isMarked(p) ((p)->mark == cur_mark) 109#define unMarkAll() cur_mark += 1 110#define Mark(p) ((p)->mark = cur_mark) 111* 112static void opt_init(struct block ); 113static void opt_cleanup(void); 114* 115static void make_marks(struct block ); 116static void mark_code(struct block ); 117 118static void intern_blocks(struct block ); 119* 120static int eq_slist(struct slist , struct slist ); 121 122static void find_levels_r(struct block ); 123* 124static void find_levels(struct block ); 125static void find_dom(struct block ); 126static void propedom(struct edge ); 127static void find_edom(struct block ); 128static void find_closure(struct block ); 129static int atomuse(struct stmt ); 130static int atomdef(struct stmt ); 131static void compute_local_ud(struct block ); 132static void find_ud(struct block ); 133static void init_val(void); 134static int F(int, int, int); 135static inline void vstore(struct stmt , int , int, int); 136static void opt_blk(struct block , int); 137static int use_conflict(struct block , struct block ); 138static void opt_j(struct edge ); 139static void or_pullup(struct block ); 140static void and_pullup(struct block ); 141static void opt_blks(struct block , int); 142static inline void link_inedge(struct edge , struct block ); 143static void find_inedges(struct block ); 144static void opt_root(struct block ); 145static void opt_loop(struct block , int); 146static void fold_op(struct stmt , int, int); 147static inline struct slist this_op(struct slist ); 148static void opt_not(struct block ); 149static void opt_peep(struct block ); 150static void opt_stmt(struct stmt , int[], int); 151static void deadstmt(struct stmt , struct stmt []); 152static void opt_deadstores(struct block ); 153static struct block fold_edge(struct block , struct edge ); 154static inline int eq_blk(struct block , struct block );	1/* 2 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that: (1) source code distributions 7 * retain the above copyright notice and this paragraph in its entirety, (2) 8 * distributions including binary code include the above copyright notice and 9 * this paragraph in its entirety in the documentation or other materials 10 * provided with the distribution, and (3) all advertising materials mentioning 11 * features or use of this software display the following acknowledgement: 12 * ``This product includes software developed by the University of California, 13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of 14 * the University nor the names of its contributors may be used to endorse 15 * or promote products derived from this software without specific prior 16 * written permission. 17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 20 * 21 * Optimization module for tcpdump intermediate representation. 22 / 23#ifndef lint 24static const char rcsid[] _U_ = 25 "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.91 2008-01-02 04:16:46 guy Exp $ (LBL)"; 26#endif 27 28#ifdef HAVE_CONFIG_H 29#include "config.h" 30#endif 31 32#ifdef WIN32 33#include <pcap-stdinc.h> 34#else / WIN32 / 35#if HAVE_INTTYPES_H 36#include <inttypes.h> 37#elif HAVE_STDINT_H 38#include <stdint.h> 39#endif 40#ifdef HAVE_SYS_BITYPES_H 41#include <sys/bitypes.h> 42#endif 43#include <sys/types.h> 44#endif / WIN32 / 45 46#include <stdio.h> 47#include <stdlib.h> 48#include <memory.h> 49#include <string.h> 50 51#include <errno.h> 52 53#include "pcap-int.h" 54 55#include "gencode.h" 56 57#ifdef HAVE_OS_PROTO_H 58#include "os-proto.h" 59#endif 60 61#ifdef BDEBUG 62extern int dflag; 63#endif 64 65#if defined(MSDOS) && !defined(__DJGPP__) 66extern int _w32_ffs (int mask); 67#define ffs _w32_ffs 68#endif 69 70#if defined(WIN32) && defined (_MSC_VER) 71int ffs(int mask); 72#endif 73 74/ 75 * Represents a deleted instruction. 76 / 77#define NOP -1 78 79/ 80 * Register numbers for use-def values. 81 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory 82 * location. A_ATOM is the accumulator and X_ATOM is the index 83 * register. 84 / 85#define A_ATOM BPF_MEMWORDS 86#define X_ATOM (BPF_MEMWORDS+1) 87 88/ 89 * This define is used to represent both the accumulator and 90 * x register in use-def computations. 91 * Currently, the use-def code assumes only one definition per instruction. 92 / 93#define AX_ATOM N_ATOMS 94 95/ 96 * A flag to indicate that further optimization is needed. 97 * Iterative passes are continued until a given pass yields no 98 * branch movement. 99 / 100static int done; 101* 102/* 103 * A block is marked if only if its mark equals the current mark. 104 * Rather than traverse the code array, marking each item, 'cur_mark' is 105 * incremented. This automatically makes each element unmarked. 106 / 107static int cur_mark; 108#define isMarked(p) ((p)->mark == cur_mark) 109#define unMarkAll() cur_mark += 1 110#define Mark(p) ((p)->mark = cur_mark) 111* 112static void opt_init(struct block ); 113static void opt_cleanup(void); 114* 115static void make_marks(struct block ); 116static void mark_code(struct block ); 117 118static void intern_blocks(struct block ); 119* 120static int eq_slist(struct slist , struct slist ); 121 122static void find_levels_r(struct block ); 123* 124static void find_levels(struct block ); 125static void find_dom(struct block ); 126static void propedom(struct edge ); 127static void find_edom(struct block ); 128static void find_closure(struct block ); 129static int atomuse(struct stmt ); 130static int atomdef(struct stmt ); 131static void compute_local_ud(struct block ); 132static void find_ud(struct block ); 133static void init_val(void); 134static int F(int, int, int); 135static inline void vstore(struct stmt , int , int, int); 136static void opt_blk(struct block , int); 137static int use_conflict(struct block , struct block ); 138static void opt_j(struct edge ); 139static void or_pullup(struct block ); 140static void and_pullup(struct block ); 141static void opt_blks(struct block , int); 142static inline void link_inedge(struct edge , struct block ); 143static void find_inedges(struct block ); 144static void opt_root(struct block ); 145static void opt_loop(struct block , int); 146static void fold_op(struct stmt , int, int); 147static inline struct slist this_op(struct slist ); 148static void opt_not(struct block ); 149static void opt_peep(struct block ); 150static void opt_stmt(struct stmt , int[], int); 151static void deadstmt(struct stmt , struct stmt []); 152static void opt_deadstores(struct block ); 153static struct block fold_edge(struct block , struct edge ); 154static inline int eq_blk(struct block , struct block );
155static int slength(struct slist *);	155static u_int slength(struct slist *);
156static int count_blocks(struct block ); 157static void number_blks_r(struct block );	156static int count_blocks(struct block ); 157static void number_blks_r(struct block );
158static int count_stmts(struct block *);	158static u_int count_stmts(struct block *);
159static int convert_code_r(struct block ); 160#ifdef BDEBUG 161static void opt_dump(struct block ); 162#endif 163 164static int n_blocks; 165struct block *blocks; 166static int n_edges; 167struct edge edges; 168* 169/* 170 * A bit vector set representation of the dominators. 171 * We round up the set size to the next power of two. 172 / 173static int nodewords; 174static int edgewords; 175struct block levels; 176bpf_u_int32 space; 177#define BITS_PER_WORD (8sizeof(bpf_u_int32)) 178/ 179 * True if a is in uset {p} 180 / 181#define SET_MEMBER(p, a) \ 182((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD))) 183* 184/* 185 * Add 'a' to uset p. 186 / 187#define SET_INSERT(p, a) \ 188(p)[(unsigned)(a) / BITS_PER_WORD] \|= (1 << ((unsigned)(a) % BITS_PER_WORD)) 189* 190/* 191 * Delete 'a' from uset p. 192 / 193#define SET_DELETE(p, a) \ 194(p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD)) 195* 196/* 197 * a := a intersect b 198 / 199#define SET_INTERSECT(a, b, n)\ 200{\ 201* register bpf_u_int32 _x = a, _y = b;\ 202 register int _n = n;\ 203 while (--_n >= 0) _x++ &= _y++;\ 204} 205 206/* 207 * a := a - b 208 / 209#define SET_SUBTRACT(a, b, n)\ 210{\ 211* register bpf_u_int32 _x = a, _y = b;\ 212 register int _n = n;\ 213 while (--_n >= 0) _x++ &=~ _y++;\ 214} 215 216/* 217 * a := a union b 218 / 219#define SET_UNION(a, b, n)\ 220{\ 221* register bpf_u_int32 _x = a, _y = b;\ 222 register int _n = n;\ 223 while (--_n >= 0) _x++ \|= _y++;\ 224} 225 226static uset all_dom_sets; 227static uset all_closure_sets; 228static uset all_edge_sets; 229 230#ifndef MAX 231#define MAX(a,b) ((a)>(b)?(a):(b)) 232#endif 233 234static void 235find_levels_r(b) 236 struct block b; 237{ 238* int level; 239 240 if (isMarked(b)) 241 return; 242 243 Mark(b); 244 b->link = 0; 245 246 if (JT(b)) { 247 find_levels_r(JT(b)); 248 find_levels_r(JF(b)); 249 level = MAX(JT(b)->level, JF(b)->level) + 1; 250 } else 251 level = 0; 252 b->level = level; 253 b->link = levels[level]; 254 levels[level] = b; 255} 256 257/* 258 * Level graph. The levels go from 0 at the leaves to 259 * N_LEVELS at the root. The levels[] array points to the 260 * first node of the level list, whose elements are linked 261 * with the 'link' field of the struct block. 262 / 263static void 264find_levels(root) 265* struct block root; 266{ 267* memset((char )levels, 0, n_blocks sizeof(levels)); 268* unMarkAll(); 269 find_levels_r(root); 270} 271 272/* 273 * Find dominator relationships. 274 * Assumes graph has been leveled. 275 / 276static void 277find_dom(root) 278* struct block root; 279{ 280* int i; 281 struct block b; 282* bpf_u_int32 x; 283* 284 /* 285 * Initialize sets to contain all nodes. 286 / 287* x = all_dom_sets; 288 i = n_blocks * nodewords; 289 while (--i >= 0) 290 x++ = ~0; 291* /* Root starts off empty. / 292* for (i = nodewords; --i >= 0;) 293 root->dom[i] = 0; 294 295 /* root->level is the highest level no found. / 296* for (i = root->level; i >= 0; --i) { 297 for (b = levels[i]; b; b = b->link) { 298 SET_INSERT(b->dom, b->id); 299 if (JT(b) == 0) 300 continue; 301 SET_INTERSECT(JT(b)->dom, b->dom, nodewords); 302 SET_INTERSECT(JF(b)->dom, b->dom, nodewords); 303 } 304 } 305} 306 307static void 308propedom(ep) 309 struct edge ep; 310{ 311* SET_INSERT(ep->edom, ep->id); 312 if (ep->succ) { 313 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords); 314 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords); 315 } 316} 317 318/* 319 * Compute edge dominators. 320 * Assumes graph has been leveled and predecessors established. 321 / 322static void 323find_edom(root) 324* struct block root; 325{ 326* int i; 327 uset x; 328 struct block b; 329* 330 x = all_edge_sets; 331 for (i = n_edges * edgewords; --i >= 0; ) 332 x[i] = ~0; 333 334 /* root->level is the highest level no found. / 335* memset(root->et.edom, 0, edgewords * sizeof((uset)0)); 336* memset(root->ef.edom, 0, edgewords * sizeof((uset)0)); 337* for (i = root->level; i >= 0; --i) { 338 for (b = levels[i]; b != 0; b = b->link) { 339 propedom(&b->et); 340 propedom(&b->ef); 341 } 342 } 343} 344 345/* 346 * Find the backwards transitive closure of the flow graph. These sets 347 * are backwards in the sense that we find the set of nodes that reach 348 * a given node, not the set of nodes that can be reached by a node. 349 * 350 * Assumes graph has been leveled. 351 / 352static void 353find_closure(root) 354* struct block root; 355{ 356* int i; 357 struct block b; 358* 359 /* 360 * Initialize sets to contain no nodes. 361 / 362* memset((char )all_closure_sets, 0, 363* n_blocks * nodewords * sizeof(all_closure_sets)); 364* 365 /* root->level is the highest level no found. / 366* for (i = root->level; i >= 0; --i) { 367 for (b = levels[i]; b; b = b->link) { 368 SET_INSERT(b->closure, b->id); 369 if (JT(b) == 0) 370 continue; 371 SET_UNION(JT(b)->closure, b->closure, nodewords); 372 SET_UNION(JF(b)->closure, b->closure, nodewords); 373 } 374 } 375} 376 377/* 378 * Return the register number that is used by s. If A and X are both 379 * used, return AX_ATOM. If no register is used, return -1. 380 * 381 * The implementation should probably change to an array access. 382 / 383static int 384atomuse(s) 385* struct stmt s; 386{ 387* register int c = s->code; 388 389 if (c == NOP) 390 return -1; 391 392 switch (BPF_CLASS(c)) { 393 394 case BPF_RET: 395 return (BPF_RVAL(c) == BPF_A) ? A_ATOM : 396 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1; 397 398 case BPF_LD: 399 case BPF_LDX: 400 return (BPF_MODE(c) == BPF_IND) ? X_ATOM : 401 (BPF_MODE(c) == BPF_MEM) ? s->k : -1; 402 403 case BPF_ST: 404 return A_ATOM; 405 406 case BPF_STX: 407 return X_ATOM; 408 409 case BPF_JMP: 410 case BPF_ALU: 411 if (BPF_SRC(c) == BPF_X) 412 return AX_ATOM; 413 return A_ATOM; 414 415 case BPF_MISC: 416 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM; 417 } 418 abort(); 419 /* NOTREACHED / 420} 421* 422/* 423 * Return the register number that is defined by 's'. We assume that 424 * a single stmt cannot define more than one register. If no register 425 * is defined, return -1. 426 * 427 * The implementation should probably change to an array access. 428 / 429static int 430atomdef(s) 431* struct stmt s; 432{ 433* if (s->code == NOP) 434 return -1; 435 436 switch (BPF_CLASS(s->code)) { 437 438 case BPF_LD: 439 case BPF_ALU: 440 return A_ATOM; 441 442 case BPF_LDX: 443 return X_ATOM; 444 445 case BPF_ST: 446 case BPF_STX: 447 return s->k; 448 449 case BPF_MISC: 450 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM; 451 } 452 return -1; 453} 454 455/* 456 * Compute the sets of registers used, defined, and killed by 'b'. 457 * 458 * "Used" means that a statement in 'b' uses the register before any 459 * statement in 'b' defines it, i.e. it uses the value left in 460 * that register by a predecessor block of this block. 461 * "Defined" means that a statement in 'b' defines it. 462 * "Killed" means that a statement in 'b' defines it before any 463 * statement in 'b' uses it, i.e. it kills the value left in that 464 * register by a predecessor block of this block. 465 / 466static void 467compute_local_ud(b) 468* struct block b; 469{ 470* struct slist s; 471* atomset def = 0, use = 0, kill = 0; 472 int atom; 473 474 for (s = b->stmts; s; s = s->next) { 475 if (s->s.code == NOP) 476 continue; 477 atom = atomuse(&s->s); 478 if (atom >= 0) { 479 if (atom == AX_ATOM) { 480 if (!ATOMELEM(def, X_ATOM)) 481 use \|= ATOMMASK(X_ATOM); 482 if (!ATOMELEM(def, A_ATOM)) 483 use \|= ATOMMASK(A_ATOM); 484 } 485 else if (atom < N_ATOMS) { 486 if (!ATOMELEM(def, atom)) 487 use \|= ATOMMASK(atom); 488 } 489 else 490 abort(); 491 } 492 atom = atomdef(&s->s); 493 if (atom >= 0) { 494 if (!ATOMELEM(use, atom)) 495 kill \|= ATOMMASK(atom); 496 def \|= ATOMMASK(atom); 497 } 498 } 499 if (BPF_CLASS(b->s.code) == BPF_JMP) { 500 /* 501 * XXX - what about RET? 502 / 503* atom = atomuse(&b->s); 504 if (atom >= 0) { 505 if (atom == AX_ATOM) { 506 if (!ATOMELEM(def, X_ATOM)) 507 use \|= ATOMMASK(X_ATOM); 508 if (!ATOMELEM(def, A_ATOM)) 509 use \|= ATOMMASK(A_ATOM); 510 } 511 else if (atom < N_ATOMS) { 512 if (!ATOMELEM(def, atom)) 513 use \|= ATOMMASK(atom); 514 } 515 else 516 abort(); 517 } 518 } 519 520 b->def = def; 521 b->kill = kill; 522 b->in_use = use; 523} 524 525/* 526 * Assume graph is already leveled. 527 / 528static void 529find_ud(root) 530* struct block root; 531{ 532* int i, maxlevel; 533 struct block p; 534* 535 /* 536 * root->level is the highest level no found; 537 * count down from there. 538 / 539* maxlevel = root->level; 540 for (i = maxlevel; i >= 0; --i) 541 for (p = levels[i]; p; p = p->link) { 542 compute_local_ud(p); 543 p->out_use = 0; 544 } 545 546 for (i = 1; i <= maxlevel; ++i) { 547 for (p = levels[i]; p; p = p->link) { 548 p->out_use \|= JT(p)->in_use \| JF(p)->in_use; 549 p->in_use \|= p->out_use &~ p->kill; 550 } 551 } 552} 553 554/* 555 * These data structures are used in a Cocke and Shwarz style 556 * value numbering scheme. Since the flowgraph is acyclic, 557 * exit values can be propagated from a node's predecessors 558 * provided it is uniquely defined. 559 / 560struct valnode { 561* int code; 562 int v0, v1; 563 int val; 564 struct valnode next; 565}; 566* 567#define MODULUS 213 568static struct valnode hashtbl[MODULUS]; 569static int curval; 570static int maxval; 571* 572/* Integer constants mapped with the load immediate opcode. / 573#define K(i) F(BPF_LD\|BPF_IMM\|BPF_W, i, 0L) 574* 575struct vmapinfo { 576 int is_const; 577 bpf_int32 const_val; 578}; 579 580struct vmapinfo vmap; 581struct valnode vnode_base; 582struct valnode next_vnode; 583* 584static void 585init_val() 586{ 587 curval = 0; 588 next_vnode = vnode_base; 589 memset((char )vmap, 0, maxval sizeof(vmap)); 590* memset((char )hashtbl, 0, sizeof hashtbl); 591} 592* 593/* Because we really don't have an IR, this stuff is a little messy. / 594static int 595F(code, v0, v1) 596* int code; 597 int v0, v1; 598{ 599 u_int hash; 600 int val; 601 struct valnode p; 602* 603 hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8); 604 hash %= MODULUS; 605 606 for (p = hashtbl[hash]; p; p = p->next) 607 if (p->code == code && p->v0 == v0 && p->v1 == v1) 608 return p->val; 609 610 val = ++curval; 611 if (BPF_MODE(code) == BPF_IMM && 612 (BPF_CLASS(code) == BPF_LD \|\| BPF_CLASS(code) == BPF_LDX)) { 613 vmap[val].const_val = v0; 614 vmap[val].is_const = 1; 615 } 616 p = next_vnode++; 617 p->val = val; 618 p->code = code; 619 p->v0 = v0; 620 p->v1 = v1; 621 p->next = hashtbl[hash]; 622 hashtbl[hash] = p; 623 624 return val; 625} 626 627static inline void 628vstore(s, valp, newval, alter) 629 struct stmt s; 630* int valp; 631* int newval; 632 int alter; 633{ 634 if (alter && valp == newval) 635* s->code = NOP; 636 else 637 valp = newval; 638} 639* 640static void 641fold_op(s, v0, v1) 642 struct stmt s; 643* int v0, v1; 644{ 645 bpf_u_int32 a, b; 646 647 a = vmap[v0].const_val; 648 b = vmap[v1].const_val; 649 650 switch (BPF_OP(s->code)) { 651 case BPF_ADD: 652 a += b; 653 break; 654 655 case BPF_SUB: 656 a -= b; 657 break; 658 659 case BPF_MUL: 660 a = b; 661* break; 662 663 case BPF_DIV: 664 if (b == 0) 665 bpf_error("division by zero"); 666 a /= b; 667 break; 668 669 case BPF_AND: 670 a &= b; 671 break; 672 673 case BPF_OR: 674 a \|= b; 675 break; 676 677 case BPF_LSH: 678 a <<= b; 679 break; 680 681 case BPF_RSH: 682 a >>= b; 683 break; 684 685 case BPF_NEG: 686 a = -a; 687 break; 688 689 default: 690 abort(); 691 } 692 s->k = a; 693 s->code = BPF_LD\|BPF_IMM; 694 done = 0; 695} 696 697static inline struct slist * 698this_op(s) 699 struct slist s; 700{ 701* while (s != 0 && s->s.code == NOP) 702 s = s->next; 703 return s; 704} 705 706static void 707opt_not(b) 708 struct block b; 709{ 710* struct block tmp = JT(b); 711* 712 JT(b) = JF(b); 713 JF(b) = tmp; 714} 715 716static void 717opt_peep(b) 718 struct block b; 719{ 720* struct slist s; 721* struct slist next, last; 722 int val; 723 724 s = b->stmts; 725 if (s == 0) 726 return; 727 728 last = s; 729 for (/empty/; /empty/; s = next) { 730 /* 731 * Skip over nops. 732 / 733* s = this_op(s); 734 if (s == 0) 735 break; /* nothing left in the block / 736* 737 /* 738 * Find the next real instruction after that one 739 * (skipping nops). 740 / 741* next = this_op(s->next); 742 if (next == 0) 743 break; /* no next instruction / 744* last = next; 745 746 /* 747 * st M[k] --> st M[k] 748 * ldx M[k] tax 749 / 750* if (s->s.code == BPF_ST && 751 next->s.code == (BPF_LDX\|BPF_MEM) && 752 s->s.k == next->s.k) { 753 done = 0; 754 next->s.code = BPF_MISC\|BPF_TAX; 755 } 756 /* 757 * ld #k --> ldx #k 758 * tax txa 759 / 760* if (s->s.code == (BPF_LD\|BPF_IMM) && 761 next->s.code == (BPF_MISC\|BPF_TAX)) { 762 s->s.code = BPF_LDX\|BPF_IMM; 763 next->s.code = BPF_MISC\|BPF_TXA; 764 done = 0; 765 } 766 /* 767 * This is an ugly special case, but it happens 768 * when you say tcp[k] or udp[k] where k is a constant. 769 / 770* if (s->s.code == (BPF_LD\|BPF_IMM)) { 771 struct slist add, tax, ild; 772* 773 /* 774 * Check that X isn't used on exit from this 775 * block (which the optimizer might cause). 776 * We know the code generator won't generate 777 * any local dependencies. 778 / 779* if (ATOMELEM(b->out_use, X_ATOM)) 780 continue; 781 782 /* 783 * Check that the instruction following the ldi 784 * is an addx, or it's an ldxms with an addx 785 * following it (with 0 or more nops between the 786 * ldxms and addx). 787 / 788* if (next->s.code != (BPF_LDX\|BPF_MSH\|BPF_B)) 789 add = next; 790 else 791 add = this_op(next->next); 792 if (add == 0 \|\| add->s.code != (BPF_ALU\|BPF_ADD\|BPF_X)) 793 continue; 794 795 /* 796 * Check that a tax follows that (with 0 or more 797 * nops between them). 798 / 799* tax = this_op(add->next); 800 if (tax == 0 \|\| tax->s.code != (BPF_MISC\|BPF_TAX)) 801 continue; 802 803 /* 804 * Check that an ild follows that (with 0 or more 805 * nops between them). 806 / 807* ild = this_op(tax->next); 808 if (ild == 0 \|\| BPF_CLASS(ild->s.code) != BPF_LD \|\| 809 BPF_MODE(ild->s.code) != BPF_IND) 810 continue; 811 /* 812 * We want to turn this sequence: 813 * 814 * (004) ldi #0x2 {s} 815 * (005) ldxms [14] {next} -- optional 816 * (006) addx {add} 817 * (007) tax {tax} 818 * (008) ild [x+0] {ild} 819 * 820 * into this sequence: 821 * 822 * (004) nop 823 * (005) ldxms [14] 824 * (006) nop 825 * (007) nop 826 * (008) ild [x+2] 827 * 828 * XXX We need to check that X is not 829 * subsequently used, because we want to change 830 * what'll be in it after this sequence. 831 * 832 * We know we can eliminate the accumulator 833 * modifications earlier in the sequence since 834 * it is defined by the last stmt of this sequence 835 * (i.e., the last statement of the sequence loads 836 * a value into the accumulator, so we can eliminate 837 * earlier operations on the accumulator). 838 / 839* ild->s.k += s->s.k; 840 s->s.code = NOP; 841 add->s.code = NOP; 842 tax->s.code = NOP; 843 done = 0; 844 } 845 } 846 /* 847 * If the comparison at the end of a block is an equality 848 * comparison against a constant, and nobody uses the value 849 * we leave in the A register at the end of a block, and 850 * the operation preceding the comparison is an arithmetic 851 * operation, we can sometime optimize it away. 852 / 853* if (b->s.code == (BPF_JMP\|BPF_JEQ\|BPF_K) && 854 !ATOMELEM(b->out_use, A_ATOM)) { 855 /* 856 * We can optimize away certain subtractions of the 857 * X register. 858 / 859* if (last->s.code == (BPF_ALU\|BPF_SUB\|BPF_X)) { 860 val = b->val[X_ATOM]; 861 if (vmap[val].is_const) { 862 /* 863 * If we have a subtract to do a comparison, 864 * and the X register is a known constant, 865 * we can merge this value into the 866 * comparison: 867 * 868 * sub x -> nop 869 * jeq #y jeq #(x+y) 870 / 871* b->s.k += vmap[val].const_val; 872 last->s.code = NOP; 873 done = 0; 874 } else if (b->s.k == 0) { 875 /* 876 * If the X register isn't a constant, 877 * and the comparison in the test is 878 * against 0, we can compare with the 879 * X register, instead: 880 * 881 * sub x -> nop 882 * jeq #0 jeq x 883 / 884* last->s.code = NOP; 885 b->s.code = BPF_JMP\|BPF_JEQ\|BPF_X; 886 done = 0; 887 } 888 } 889 /* 890 * Likewise, a constant subtract can be simplified: 891 * 892 * sub #x -> nop 893 * jeq #y -> jeq #(x+y) 894 / 895* else if (last->s.code == (BPF_ALU\|BPF_SUB\|BPF_K)) { 896 last->s.code = NOP; 897 b->s.k += last->s.k; 898 done = 0; 899 } 900 /* 901 * And, similarly, a constant AND can be simplified 902 * if we're testing against 0, i.e.: 903 * 904 * and #k nop 905 * jeq #0 -> jset #k 906 / 907* else if (last->s.code == (BPF_ALU\|BPF_AND\|BPF_K) && 908 b->s.k == 0) { 909 b->s.k = last->s.k; 910 b->s.code = BPF_JMP\|BPF_K\|BPF_JSET; 911 last->s.code = NOP; 912 done = 0; 913 opt_not(b); 914 } 915 } 916 /* 917 * jset #0 -> never 918 * jset #ffffffff -> always 919 / 920* if (b->s.code == (BPF_JMP\|BPF_K\|BPF_JSET)) { 921 if (b->s.k == 0) 922 JT(b) = JF(b); 923 if (b->s.k == 0xffffffff) 924 JF(b) = JT(b); 925 } 926 /* 927 * If we're comparing against the index register, and the index 928 * register is a known constant, we can just compare against that 929 * constant. 930 / 931* val = b->val[X_ATOM]; 932 if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) { 933 bpf_int32 v = vmap[val].const_val; 934 b->s.code &= ~BPF_X; 935 b->s.k = v; 936 } 937 /* 938 * If the accumulator is a known constant, we can compute the 939 * comparison result. 940 / 941* val = b->val[A_ATOM]; 942 if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) { 943 bpf_int32 v = vmap[val].const_val; 944 switch (BPF_OP(b->s.code)) { 945 946 case BPF_JEQ: 947 v = v == b->s.k; 948 break; 949 950 case BPF_JGT: 951 v = (unsigned)v > b->s.k; 952 break; 953 954 case BPF_JGE: 955 v = (unsigned)v >= b->s.k; 956 break; 957 958 case BPF_JSET: 959 v &= b->s.k; 960 break; 961 962 default: 963 abort(); 964 } 965 if (JF(b) != JT(b)) 966 done = 0; 967 if (v) 968 JF(b) = JT(b); 969 else 970 JT(b) = JF(b); 971 } 972} 973 974/* 975 * Compute the symbolic value of expression of 's', and update 976 * anything it defines in the value table 'val'. If 'alter' is true, 977 * do various optimizations. This code would be cleaner if symbolic 978 * evaluation and code transformations weren't folded together. 979 / 980static void 981opt_stmt(s, val, alter) 982* struct stmt s; 983* int val[]; 984 int alter; 985{ 986 int op; 987 int v; 988 989 switch (s->code) { 990 991 case BPF_LD\|BPF_ABS\|BPF_W: 992 case BPF_LD\|BPF_ABS\|BPF_H: 993 case BPF_LD\|BPF_ABS\|BPF_B: 994 v = F(s->code, s->k, 0L); 995 vstore(s, &val[A_ATOM], v, alter); 996 break; 997 998 case BPF_LD\|BPF_IND\|BPF_W: 999 case BPF_LD\|BPF_IND\|BPF_H: 1000 case BPF_LD\|BPF_IND\|BPF_B: 1001 v = val[X_ATOM]; 1002 if (alter && vmap[v].is_const) { 1003 s->code = BPF_LD\|BPF_ABS\|BPF_SIZE(s->code); 1004 s->k += vmap[v].const_val; 1005 v = F(s->code, s->k, 0L); 1006 done = 0; 1007 } 1008 else 1009 v = F(s->code, s->k, v); 1010 vstore(s, &val[A_ATOM], v, alter); 1011 break; 1012 1013 case BPF_LD\|BPF_LEN: 1014 v = F(s->code, 0L, 0L); 1015 vstore(s, &val[A_ATOM], v, alter); 1016 break; 1017 1018 case BPF_LD\|BPF_IMM: 1019 v = K(s->k); 1020 vstore(s, &val[A_ATOM], v, alter); 1021 break; 1022 1023 case BPF_LDX\|BPF_IMM: 1024 v = K(s->k); 1025 vstore(s, &val[X_ATOM], v, alter); 1026 break; 1027 1028 case BPF_LDX\|BPF_MSH\|BPF_B: 1029 v = F(s->code, s->k, 0L); 1030 vstore(s, &val[X_ATOM], v, alter); 1031 break; 1032 1033 case BPF_ALU\|BPF_NEG: 1034 if (alter && vmap[val[A_ATOM]].is_const) { 1035 s->code = BPF_LD\|BPF_IMM; 1036 s->k = -vmap[val[A_ATOM]].const_val; 1037 val[A_ATOM] = K(s->k); 1038 } 1039 else 1040 val[A_ATOM] = F(s->code, val[A_ATOM], 0L); 1041 break; 1042 1043 case BPF_ALU\|BPF_ADD\|BPF_K: 1044 case BPF_ALU\|BPF_SUB\|BPF_K: 1045 case BPF_ALU\|BPF_MUL\|BPF_K: 1046 case BPF_ALU\|BPF_DIV\|BPF_K: 1047 case BPF_ALU\|BPF_AND\|BPF_K: 1048 case BPF_ALU\|BPF_OR\|BPF_K: 1049 case BPF_ALU\|BPF_LSH\|BPF_K: 1050 case BPF_ALU\|BPF_RSH\|BPF_K: 1051 op = BPF_OP(s->code); 1052 if (alter) { 1053 if (s->k == 0) { 1054 /* don't optimize away "sub #0" 1055 * as it may be needed later to 1056 * fixup the generated math code / 1057* if (op == BPF_ADD \|\| 1058 op == BPF_LSH \|\| op == BPF_RSH \|\| 1059 op == BPF_OR) { 1060 s->code = NOP; 1061 break; 1062 } 1063 if (op == BPF_MUL \|\| op == BPF_AND) { 1064 s->code = BPF_LD\|BPF_IMM; 1065 val[A_ATOM] = K(s->k); 1066 break; 1067 } 1068 } 1069 if (vmap[val[A_ATOM]].is_const) { 1070 fold_op(s, val[A_ATOM], K(s->k)); 1071 val[A_ATOM] = K(s->k); 1072 break; 1073 } 1074 } 1075 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k)); 1076 break; 1077 1078 case BPF_ALU\|BPF_ADD\|BPF_X: 1079 case BPF_ALU\|BPF_SUB\|BPF_X: 1080 case BPF_ALU\|BPF_MUL\|BPF_X: 1081 case BPF_ALU\|BPF_DIV\|BPF_X: 1082 case BPF_ALU\|BPF_AND\|BPF_X: 1083 case BPF_ALU\|BPF_OR\|BPF_X: 1084 case BPF_ALU\|BPF_LSH\|BPF_X: 1085 case BPF_ALU\|BPF_RSH\|BPF_X: 1086 op = BPF_OP(s->code); 1087 if (alter && vmap[val[X_ATOM]].is_const) { 1088 if (vmap[val[A_ATOM]].is_const) { 1089 fold_op(s, val[A_ATOM], val[X_ATOM]); 1090 val[A_ATOM] = K(s->k); 1091 } 1092 else { 1093 s->code = BPF_ALU\|BPF_K\|op; 1094 s->k = vmap[val[X_ATOM]].const_val; 1095 done = 0; 1096 val[A_ATOM] = 1097 F(s->code, val[A_ATOM], K(s->k)); 1098 } 1099 break; 1100 } 1101 /* 1102 * Check if we're doing something to an accumulator 1103 * that is 0, and simplify. This may not seem like 1104 * much of a simplification but it could open up further 1105 * optimizations. 1106 * XXX We could also check for mul by 1, etc. 1107 / 1108* if (alter && vmap[val[A_ATOM]].is_const 1109 && vmap[val[A_ATOM]].const_val == 0) { 1110 if (op == BPF_ADD \|\| op == BPF_OR) { 1111 s->code = BPF_MISC\|BPF_TXA; 1112 vstore(s, &val[A_ATOM], val[X_ATOM], alter); 1113 break; 1114 } 1115 else if (op == BPF_MUL \|\| op == BPF_DIV \|\| 1116 op == BPF_AND \|\| op == BPF_LSH \|\| op == BPF_RSH) { 1117 s->code = BPF_LD\|BPF_IMM; 1118 s->k = 0; 1119 vstore(s, &val[A_ATOM], K(s->k), alter); 1120 break; 1121 } 1122 else if (op == BPF_NEG) { 1123 s->code = NOP; 1124 break; 1125 } 1126 } 1127 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]); 1128 break; 1129 1130 case BPF_MISC\|BPF_TXA: 1131 vstore(s, &val[A_ATOM], val[X_ATOM], alter); 1132 break; 1133 1134 case BPF_LD\|BPF_MEM: 1135 v = val[s->k]; 1136 if (alter && vmap[v].is_const) { 1137 s->code = BPF_LD\|BPF_IMM; 1138 s->k = vmap[v].const_val; 1139 done = 0; 1140 } 1141 vstore(s, &val[A_ATOM], v, alter); 1142 break; 1143 1144 case BPF_MISC\|BPF_TAX: 1145 vstore(s, &val[X_ATOM], val[A_ATOM], alter); 1146 break; 1147 1148 case BPF_LDX\|BPF_MEM: 1149 v = val[s->k]; 1150 if (alter && vmap[v].is_const) { 1151 s->code = BPF_LDX\|BPF_IMM; 1152 s->k = vmap[v].const_val; 1153 done = 0; 1154 } 1155 vstore(s, &val[X_ATOM], v, alter); 1156 break; 1157 1158 case BPF_ST: 1159 vstore(s, &val[s->k], val[A_ATOM], alter); 1160 break; 1161 1162 case BPF_STX: 1163 vstore(s, &val[s->k], val[X_ATOM], alter); 1164 break; 1165 } 1166} 1167 1168static void 1169deadstmt(s, last) 1170 register struct stmt s; 1171* register struct stmt last[]; 1172{ 1173* register int atom; 1174 1175 atom = atomuse(s); 1176 if (atom >= 0) { 1177 if (atom == AX_ATOM) { 1178 last[X_ATOM] = 0; 1179 last[A_ATOM] = 0; 1180 } 1181 else 1182 last[atom] = 0; 1183 } 1184 atom = atomdef(s); 1185 if (atom >= 0) { 1186 if (last[atom]) { 1187 done = 0; 1188 last[atom]->code = NOP; 1189 } 1190 last[atom] = s; 1191 } 1192} 1193 1194static void 1195opt_deadstores(b) 1196 register struct block b; 1197{ 1198* register struct slist s; 1199* register int atom; 1200 struct stmt last[N_ATOMS]; 1201* 1202 memset((char )last, 0, sizeof last); 1203* 1204 for (s = b->stmts; s != 0; s = s->next) 1205 deadstmt(&s->s, last); 1206 deadstmt(&b->s, last); 1207 1208 for (atom = 0; atom < N_ATOMS; ++atom) 1209 if (last[atom] && !ATOMELEM(b->out_use, atom)) { 1210 last[atom]->code = NOP; 1211 done = 0; 1212 } 1213} 1214 1215static void 1216opt_blk(b, do_stmts) 1217 struct block b; 1218* int do_stmts; 1219{ 1220 struct slist s; 1221* struct edge p; 1222* int i; 1223 bpf_int32 aval, xval; 1224 1225#if 0 1226 for (s = b->stmts; s && s->next; s = s->next) 1227 if (BPF_CLASS(s->s.code) == BPF_JMP) { 1228 do_stmts = 0; 1229 break; 1230 } 1231#endif 1232 1233 /* 1234 * Initialize the atom values. 1235 / 1236* p = b->in_edges; 1237 if (p == 0) { 1238 /* 1239 * We have no predecessors, so everything is undefined 1240 * upon entry to this block. 1241 / 1242* memset((char )b->val, 0, sizeof(b->val)); 1243* } else { 1244 /* 1245 * Inherit values from our predecessors. 1246 * 1247 * First, get the values from the predecessor along the 1248 * first edge leading to this node. 1249 / 1250* memcpy((char )b->val, (char )p->pred->val, sizeof(b->val)); 1251 /* 1252 * Now look at all the other nodes leading to this node. 1253 * If, for the predecessor along that edge, a register 1254 * has a different value from the one we have (i.e., 1255 * control paths are merging, and the merging paths 1256 * assign different values to that register), give the 1257 * register the undefined value of 0. 1258 / 1259* while ((p = p->next) != NULL) { 1260 for (i = 0; i < N_ATOMS; ++i) 1261 if (b->val[i] != p->pred->val[i]) 1262 b->val[i] = 0; 1263 } 1264 } 1265 aval = b->val[A_ATOM]; 1266 xval = b->val[X_ATOM]; 1267 for (s = b->stmts; s; s = s->next) 1268 opt_stmt(&s->s, b->val, do_stmts); 1269 1270 /* 1271 * This is a special case: if we don't use anything from this 1272 * block, and we load the accumulator or index register with a 1273 * value that is already there, or if this block is a return, 1274 * eliminate all the statements. 1275 * 1276 * XXX - what if it does a store? 1277 * 1278 * XXX - why does it matter whether we use anything from this 1279 * block? If the accumulator or index register doesn't change 1280 * its value, isn't that OK even if we use that value? 1281 * 1282 * XXX - if we load the accumulator with a different value, 1283 * and the block ends with a conditional branch, we obviously 1284 * can't eliminate it, as the branch depends on that value. 1285 * For the index register, the conditional branch only depends 1286 * on the index register value if the test is against the index 1287 * register value rather than a constant; if nothing uses the 1288 * value we put into the index register, and we're not testing 1289 * against the index register's value, and there aren't any 1290 * other problems that would keep us from eliminating this 1291 * block, can we eliminate it? 1292 / 1293* if (do_stmts && 1294 ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval && 1295 xval != 0 && b->val[X_ATOM] == xval) \|\| 1296 BPF_CLASS(b->s.code) == BPF_RET)) { 1297 if (b->stmts != 0) { 1298 b->stmts = 0; 1299 done = 0; 1300 } 1301 } else { 1302 opt_peep(b); 1303 opt_deadstores(b); 1304 } 1305 /* 1306 * Set up values for branch optimizer. 1307 / 1308* if (BPF_SRC(b->s.code) == BPF_K) 1309 b->oval = K(b->s.k); 1310 else 1311 b->oval = b->val[X_ATOM]; 1312 b->et.code = b->s.code; 1313 b->ef.code = -b->s.code; 1314} 1315 1316/* 1317 * Return true if any register that is used on exit from 'succ', has 1318 * an exit value that is different from the corresponding exit value 1319 * from 'b'. 1320 / 1321static int 1322use_conflict(b, succ) 1323* struct block b, succ; 1324{ 1325 int atom; 1326 atomset use = succ->out_use; 1327 1328 if (use == 0) 1329 return 0; 1330 1331 for (atom = 0; atom < N_ATOMS; ++atom) 1332 if (ATOMELEM(use, atom)) 1333 if (b->val[atom] != succ->val[atom]) 1334 return 1; 1335 return 0; 1336} 1337 1338static struct block * 1339fold_edge(child, ep) 1340 struct block child; 1341* struct edge ep; 1342{ 1343* int sense; 1344 int aval0, aval1, oval0, oval1; 1345 int code = ep->code; 1346 1347 if (code < 0) { 1348 code = -code; 1349 sense = 0; 1350 } else 1351 sense = 1; 1352 1353 if (child->s.code != code) 1354 return 0; 1355 1356 aval0 = child->val[A_ATOM]; 1357 oval0 = child->oval; 1358 aval1 = ep->pred->val[A_ATOM]; 1359 oval1 = ep->pred->oval; 1360 1361 if (aval0 != aval1) 1362 return 0; 1363 1364 if (oval0 == oval1) 1365 /* 1366 * The operands of the branch instructions are 1367 * identical, so the result is true if a true 1368 * branch was taken to get here, otherwise false. 1369 / 1370* return sense ? JT(child) : JF(child); 1371 1372 if (sense && code == (BPF_JMP\|BPF_JEQ\|BPF_K)) 1373 /* 1374 * At this point, we only know the comparison if we 1375 * came down the true branch, and it was an equality 1376 * comparison with a constant. 1377 * 1378 * I.e., if we came down the true branch, and the branch 1379 * was an equality comparison with a constant, we know the 1380 * accumulator contains that constant. If we came down 1381 * the false branch, or the comparison wasn't with a 1382 * constant, we don't know what was in the accumulator. 1383 * 1384 * We rely on the fact that distinct constants have distinct 1385 * value numbers. 1386 / 1387* return JF(child); 1388 1389 return 0; 1390} 1391 1392static void 1393opt_j(ep) 1394 struct edge ep; 1395{ 1396* register int i, k; 1397 register struct block target; 1398* 1399 if (JT(ep->succ) == 0) 1400 return; 1401 1402 if (JT(ep->succ) == JF(ep->succ)) { 1403 /* 1404 * Common branch targets can be eliminated, provided 1405 * there is no data dependency. 1406 / 1407* if (!use_conflict(ep->pred, ep->succ->et.succ)) { 1408 done = 0; 1409 ep->succ = JT(ep->succ); 1410 } 1411 } 1412 /* 1413 * For each edge dominator that matches the successor of this 1414 * edge, promote the edge successor to the its grandchild. 1415 * 1416 * XXX We violate the set abstraction here in favor a reasonably 1417 * efficient loop. 1418 / 1419* top: 1420 for (i = 0; i < edgewords; ++i) { 1421 register bpf_u_int32 x = ep->edom[i]; 1422 1423 while (x != 0) { 1424 k = ffs(x) - 1; 1425 x &=~ (1 << k); 1426 k += i * BITS_PER_WORD; 1427 1428 target = fold_edge(ep->succ, edges[k]); 1429 /* 1430 * Check that there is no data dependency between 1431 * nodes that will be violated if we move the edge. 1432 / 1433* if (target != 0 && !use_conflict(ep->pred, target)) { 1434 done = 0; 1435 ep->succ = target; 1436 if (JT(target) != 0) 1437 /* 1438 * Start over unless we hit a leaf. 1439 / 1440* goto top; 1441 return; 1442 } 1443 } 1444 } 1445} 1446 1447 1448static void 1449or_pullup(b) 1450 struct block b; 1451{ 1452* int val, at_top; 1453 struct block pull; 1454* struct block diffp, samep; 1455 struct edge ep; 1456* 1457 ep = b->in_edges; 1458 if (ep == 0) 1459 return; 1460 1461 /* 1462 * Make sure each predecessor loads the same value. 1463 * XXX why? 1464 / 1465* val = ep->pred->val[A_ATOM]; 1466 for (ep = ep->next; ep != 0; ep = ep->next) 1467 if (val != ep->pred->val[A_ATOM]) 1468 return; 1469 1470 if (JT(b->in_edges->pred) == b) 1471 diffp = &JT(b->in_edges->pred); 1472 else 1473 diffp = &JF(b->in_edges->pred); 1474 1475 at_top = 1; 1476 while (1) { 1477 if (diffp == 0) 1478* return; 1479 1480 if (JT(diffp) != JT(b)) 1481* return; 1482 1483 if (!SET_MEMBER((diffp)->dom, b->id)) 1484* return; 1485 1486 if ((diffp)->val[A_ATOM] != val) 1487* break; 1488 1489 diffp = &JF(diffp); 1490* at_top = 0; 1491 } 1492 samep = &JF(diffp); 1493* while (1) { 1494 if (samep == 0) 1495* return; 1496 1497 if (JT(samep) != JT(b)) 1498* return; 1499 1500 if (!SET_MEMBER((samep)->dom, b->id)) 1501* return; 1502 1503 if ((samep)->val[A_ATOM] == val) 1504* break; 1505 1506 /* XXX Need to check that there are no data dependencies 1507 between dp0 and dp1. Currently, the code generator 1508 will not produce such dependencies. / 1509* samep = &JF(samep); 1510* } 1511#ifdef notdef 1512 /* XXX This doesn't cover everything. / 1513* for (i = 0; i < N_ATOMS; ++i) 1514 if ((samep)->val[i] != pred->val[i]) 1515* return; 1516#endif 1517 /* Pull up the node. / 1518* pull = samep; 1519* samep = JF(pull); 1520* JF(pull) = diffp; 1521* 1522 /* 1523 * At the top of the chain, each predecessor needs to point at the 1524 * pulled up node. Inside the chain, there is only one predecessor 1525 * to worry about. 1526 / 1527* if (at_top) { 1528 for (ep = b->in_edges; ep != 0; ep = ep->next) { 1529 if (JT(ep->pred) == b) 1530 JT(ep->pred) = pull; 1531 else 1532 JF(ep->pred) = pull; 1533 } 1534 } 1535 else 1536 diffp = pull; 1537* 1538 done = 0; 1539} 1540 1541static void 1542and_pullup(b) 1543 struct block b; 1544{ 1545* int val, at_top; 1546 struct block pull; 1547* struct block diffp, samep; 1548 struct edge ep; 1549* 1550 ep = b->in_edges; 1551 if (ep == 0) 1552 return; 1553 1554 /* 1555 * Make sure each predecessor loads the same value. 1556 / 1557* val = ep->pred->val[A_ATOM]; 1558 for (ep = ep->next; ep != 0; ep = ep->next) 1559 if (val != ep->pred->val[A_ATOM]) 1560 return; 1561 1562 if (JT(b->in_edges->pred) == b) 1563 diffp = &JT(b->in_edges->pred); 1564 else 1565 diffp = &JF(b->in_edges->pred); 1566 1567 at_top = 1; 1568 while (1) { 1569 if (diffp == 0) 1570* return; 1571 1572 if (JF(diffp) != JF(b)) 1573* return; 1574 1575 if (!SET_MEMBER((diffp)->dom, b->id)) 1576* return; 1577 1578 if ((diffp)->val[A_ATOM] != val) 1579* break; 1580 1581 diffp = &JT(diffp); 1582* at_top = 0; 1583 } 1584 samep = &JT(diffp); 1585* while (1) { 1586 if (samep == 0) 1587* return; 1588 1589 if (JF(samep) != JF(b)) 1590* return; 1591 1592 if (!SET_MEMBER((samep)->dom, b->id)) 1593* return; 1594 1595 if ((samep)->val[A_ATOM] == val) 1596* break; 1597 1598 /* XXX Need to check that there are no data dependencies 1599 between diffp and samep. Currently, the code generator 1600 will not produce such dependencies. / 1601* samep = &JT(samep); 1602* } 1603#ifdef notdef 1604 /* XXX This doesn't cover everything. / 1605* for (i = 0; i < N_ATOMS; ++i) 1606 if ((samep)->val[i] != pred->val[i]) 1607* return; 1608#endif 1609 /* Pull up the node. / 1610* pull = samep; 1611* samep = JT(pull); 1612* JT(pull) = diffp; 1613* 1614 /* 1615 * At the top of the chain, each predecessor needs to point at the 1616 * pulled up node. Inside the chain, there is only one predecessor 1617 * to worry about. 1618 / 1619* if (at_top) { 1620 for (ep = b->in_edges; ep != 0; ep = ep->next) { 1621 if (JT(ep->pred) == b) 1622 JT(ep->pred) = pull; 1623 else 1624 JF(ep->pred) = pull; 1625 } 1626 } 1627 else 1628 diffp = pull; 1629* 1630 done = 0; 1631} 1632 1633static void 1634opt_blks(root, do_stmts) 1635 struct block root; 1636* int do_stmts; 1637{ 1638 int i, maxlevel; 1639 struct block p; 1640* 1641 init_val(); 1642 maxlevel = root->level; 1643 1644 find_inedges(root); 1645 for (i = maxlevel; i >= 0; --i) 1646 for (p = levels[i]; p; p = p->link) 1647 opt_blk(p, do_stmts); 1648 1649 if (do_stmts) 1650 /* 1651 * No point trying to move branches; it can't possibly 1652 * make a difference at this point. 1653 / 1654* return; 1655 1656 for (i = 1; i <= maxlevel; ++i) { 1657 for (p = levels[i]; p; p = p->link) { 1658 opt_j(&p->et); 1659 opt_j(&p->ef); 1660 } 1661 } 1662 1663 find_inedges(root); 1664 for (i = 1; i <= maxlevel; ++i) { 1665 for (p = levels[i]; p; p = p->link) { 1666 or_pullup(p); 1667 and_pullup(p); 1668 } 1669 } 1670} 1671 1672static inline void 1673link_inedge(parent, child) 1674 struct edge parent; 1675* struct block child; 1676{ 1677* parent->next = child->in_edges; 1678 child->in_edges = parent; 1679} 1680 1681static void 1682find_inedges(root) 1683 struct block root; 1684{ 1685* int i; 1686 struct block b; 1687* 1688 for (i = 0; i < n_blocks; ++i) 1689 blocks[i]->in_edges = 0; 1690 1691 /* 1692 * Traverse the graph, adding each edge to the predecessor 1693 * list of its successors. Skip the leaves (i.e. level 0). 1694 / 1695* for (i = root->level; i > 0; --i) { 1696 for (b = levels[i]; b != 0; b = b->link) { 1697 link_inedge(&b->et, JT(b)); 1698 link_inedge(&b->ef, JF(b)); 1699 } 1700 } 1701} 1702 1703static void 1704opt_root(b) 1705 struct block *b; 1706{ 1707* struct slist tmp, s; 1708 1709 s = (b)->stmts; 1710* (b)->stmts = 0; 1711* while (BPF_CLASS((b)->s.code) == BPF_JMP && JT(b) == JF(b)) 1712* b = JT(b); 1713 1714 tmp = (b)->stmts; 1715* if (tmp != 0) 1716 sappend(s, tmp); 1717 (b)->stmts = s; 1718* 1719 /* 1720 * If the root node is a return, then there is no 1721 * point executing any statements (since the bpf machine 1722 * has no side effects). 1723 / 1724* if (BPF_CLASS((b)->s.code) == BPF_RET) 1725* (b)->stmts = 0; 1726} 1727* 1728static void 1729opt_loop(root, do_stmts) 1730 struct block root; 1731* int do_stmts; 1732{ 1733 1734#ifdef BDEBUG 1735 if (dflag > 1) { 1736 printf("opt_loop(root, %d) begin\n", do_stmts); 1737 opt_dump(root); 1738 } 1739#endif 1740 do { 1741 done = 1; 1742 find_levels(root); 1743 find_dom(root); 1744 find_closure(root); 1745 find_ud(root); 1746 find_edom(root); 1747 opt_blks(root, do_stmts); 1748#ifdef BDEBUG 1749 if (dflag > 1) { 1750 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done); 1751 opt_dump(root); 1752 } 1753#endif 1754 } while (!done); 1755} 1756 1757/* 1758 * Optimize the filter code in its dag representation. 1759 / 1760void 1761bpf_optimize(rootp) 1762* struct block *rootp; 1763{ 1764* struct block root; 1765* 1766 root = rootp; 1767* 1768 opt_init(root); 1769 opt_loop(root, 0); 1770 opt_loop(root, 1); 1771 intern_blocks(root); 1772#ifdef BDEBUG 1773 if (dflag > 1) { 1774 printf("after intern_blocks()\n"); 1775 opt_dump(root); 1776 } 1777#endif 1778 opt_root(rootp); 1779#ifdef BDEBUG 1780 if (dflag > 1) { 1781 printf("after opt_root()\n"); 1782 opt_dump(root); 1783 } 1784#endif 1785 opt_cleanup(); 1786} 1787 1788static void 1789make_marks(p) 1790 struct block p; 1791{ 1792* if (!isMarked(p)) { 1793 Mark(p); 1794 if (BPF_CLASS(p->s.code) != BPF_RET) { 1795 make_marks(JT(p)); 1796 make_marks(JF(p)); 1797 } 1798 } 1799} 1800 1801/* 1802 * Mark code array such that isMarked(i) is true 1803 * only for nodes that are alive. 1804 / 1805static void 1806mark_code(p) 1807* struct block p; 1808{ 1809* cur_mark += 1; 1810 make_marks(p); 1811} 1812 1813/* 1814 * True iff the two stmt lists load the same value from the packet into 1815 * the accumulator. 1816 / 1817static int 1818eq_slist(x, y) 1819* struct slist x, y; 1820{ 1821 while (1) { 1822 while (x && x->s.code == NOP) 1823 x = x->next; 1824 while (y && y->s.code == NOP) 1825 y = y->next; 1826 if (x == 0) 1827 return y == 0; 1828 if (y == 0) 1829 return x == 0; 1830 if (x->s.code != y->s.code \|\| x->s.k != y->s.k) 1831 return 0; 1832 x = x->next; 1833 y = y->next; 1834 } 1835} 1836 1837static inline int 1838eq_blk(b0, b1) 1839 struct block b0, b1; 1840{ 1841 if (b0->s.code == b1->s.code && 1842 b0->s.k == b1->s.k && 1843 b0->et.succ == b1->et.succ && 1844 b0->ef.succ == b1->ef.succ) 1845 return eq_slist(b0->stmts, b1->stmts); 1846 return 0; 1847} 1848 1849static void 1850intern_blocks(root) 1851 struct block root; 1852{ 1853* struct block p; 1854* int i, j; 1855 int done1; /* don't shadow global / 1856* top: 1857 done1 = 1; 1858 for (i = 0; i < n_blocks; ++i) 1859 blocks[i]->link = 0; 1860 1861 mark_code(root); 1862 1863 for (i = n_blocks - 1; --i >= 0; ) { 1864 if (!isMarked(blocks[i])) 1865 continue; 1866 for (j = i + 1; j < n_blocks; ++j) { 1867 if (!isMarked(blocks[j])) 1868 continue; 1869 if (eq_blk(blocks[i], blocks[j])) { 1870 blocks[i]->link = blocks[j]->link ? 1871 blocks[j]->link : blocks[j]; 1872 break; 1873 } 1874 } 1875 } 1876 for (i = 0; i < n_blocks; ++i) { 1877 p = blocks[i]; 1878 if (JT(p) == 0) 1879 continue; 1880 if (JT(p)->link) { 1881 done1 = 0; 1882 JT(p) = JT(p)->link; 1883 } 1884 if (JF(p)->link) { 1885 done1 = 0; 1886 JF(p) = JF(p)->link; 1887 } 1888 } 1889 if (!done1) 1890 goto top; 1891} 1892 1893static void 1894opt_cleanup() 1895{ 1896 free((void )vnode_base); 1897* free((void )vmap); 1898* free((void )edges); 1899* free((void )space); 1900* free((void )levels); 1901* free((void )blocks); 1902} 1903* 1904/* 1905 * Return the number of stmts in 's'. 1906 */	159static int convert_code_r(struct block ); 160#ifdef BDEBUG 161static void opt_dump(struct block ); 162#endif 163 164static int n_blocks; 165struct block *blocks; 166static int n_edges; 167struct edge edges; 168* 169/* 170 * A bit vector set representation of the dominators. 171 * We round up the set size to the next power of two. 172 / 173static int nodewords; 174static int edgewords; 175struct block levels; 176bpf_u_int32 space; 177#define BITS_PER_WORD (8sizeof(bpf_u_int32)) 178/ 179 * True if a is in uset {p} 180 / 181#define SET_MEMBER(p, a) \ 182((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD))) 183* 184/* 185 * Add 'a' to uset p. 186 / 187#define SET_INSERT(p, a) \ 188(p)[(unsigned)(a) / BITS_PER_WORD] \|= (1 << ((unsigned)(a) % BITS_PER_WORD)) 189* 190/* 191 * Delete 'a' from uset p. 192 / 193#define SET_DELETE(p, a) \ 194(p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD)) 195* 196/* 197 * a := a intersect b 198 / 199#define SET_INTERSECT(a, b, n)\ 200{\ 201* register bpf_u_int32 _x = a, _y = b;\ 202 register int _n = n;\ 203 while (--_n >= 0) _x++ &= _y++;\ 204} 205 206/* 207 * a := a - b 208 / 209#define SET_SUBTRACT(a, b, n)\ 210{\ 211* register bpf_u_int32 _x = a, _y = b;\ 212 register int _n = n;\ 213 while (--_n >= 0) _x++ &=~ _y++;\ 214} 215 216/* 217 * a := a union b 218 / 219#define SET_UNION(a, b, n)\ 220{\ 221* register bpf_u_int32 _x = a, _y = b;\ 222 register int _n = n;\ 223 while (--_n >= 0) _x++ \|= _y++;\ 224} 225 226static uset all_dom_sets; 227static uset all_closure_sets; 228static uset all_edge_sets; 229 230#ifndef MAX 231#define MAX(a,b) ((a)>(b)?(a):(b)) 232#endif 233 234static void 235find_levels_r(b) 236 struct block b; 237{ 238* int level; 239 240 if (isMarked(b)) 241 return; 242 243 Mark(b); 244 b->link = 0; 245 246 if (JT(b)) { 247 find_levels_r(JT(b)); 248 find_levels_r(JF(b)); 249 level = MAX(JT(b)->level, JF(b)->level) + 1; 250 } else 251 level = 0; 252 b->level = level; 253 b->link = levels[level]; 254 levels[level] = b; 255} 256 257/* 258 * Level graph. The levels go from 0 at the leaves to 259 * N_LEVELS at the root. The levels[] array points to the 260 * first node of the level list, whose elements are linked 261 * with the 'link' field of the struct block. 262 / 263static void 264find_levels(root) 265* struct block root; 266{ 267* memset((char )levels, 0, n_blocks sizeof(levels)); 268* unMarkAll(); 269 find_levels_r(root); 270} 271 272/* 273 * Find dominator relationships. 274 * Assumes graph has been leveled. 275 / 276static void 277find_dom(root) 278* struct block root; 279{ 280* int i; 281 struct block b; 282* bpf_u_int32 x; 283* 284 /* 285 * Initialize sets to contain all nodes. 286 / 287* x = all_dom_sets; 288 i = n_blocks * nodewords; 289 while (--i >= 0) 290 x++ = ~0; 291* /* Root starts off empty. / 292* for (i = nodewords; --i >= 0;) 293 root->dom[i] = 0; 294 295 /* root->level is the highest level no found. / 296* for (i = root->level; i >= 0; --i) { 297 for (b = levels[i]; b; b = b->link) { 298 SET_INSERT(b->dom, b->id); 299 if (JT(b) == 0) 300 continue; 301 SET_INTERSECT(JT(b)->dom, b->dom, nodewords); 302 SET_INTERSECT(JF(b)->dom, b->dom, nodewords); 303 } 304 } 305} 306 307static void 308propedom(ep) 309 struct edge ep; 310{ 311* SET_INSERT(ep->edom, ep->id); 312 if (ep->succ) { 313 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords); 314 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords); 315 } 316} 317 318/* 319 * Compute edge dominators. 320 * Assumes graph has been leveled and predecessors established. 321 / 322static void 323find_edom(root) 324* struct block root; 325{ 326* int i; 327 uset x; 328 struct block b; 329* 330 x = all_edge_sets; 331 for (i = n_edges * edgewords; --i >= 0; ) 332 x[i] = ~0; 333 334 /* root->level is the highest level no found. / 335* memset(root->et.edom, 0, edgewords * sizeof((uset)0)); 336* memset(root->ef.edom, 0, edgewords * sizeof((uset)0)); 337* for (i = root->level; i >= 0; --i) { 338 for (b = levels[i]; b != 0; b = b->link) { 339 propedom(&b->et); 340 propedom(&b->ef); 341 } 342 } 343} 344 345/* 346 * Find the backwards transitive closure of the flow graph. These sets 347 * are backwards in the sense that we find the set of nodes that reach 348 * a given node, not the set of nodes that can be reached by a node. 349 * 350 * Assumes graph has been leveled. 351 / 352static void 353find_closure(root) 354* struct block root; 355{ 356* int i; 357 struct block b; 358* 359 /* 360 * Initialize sets to contain no nodes. 361 / 362* memset((char )all_closure_sets, 0, 363* n_blocks * nodewords * sizeof(all_closure_sets)); 364* 365 /* root->level is the highest level no found. / 366* for (i = root->level; i >= 0; --i) { 367 for (b = levels[i]; b; b = b->link) { 368 SET_INSERT(b->closure, b->id); 369 if (JT(b) == 0) 370 continue; 371 SET_UNION(JT(b)->closure, b->closure, nodewords); 372 SET_UNION(JF(b)->closure, b->closure, nodewords); 373 } 374 } 375} 376 377/* 378 * Return the register number that is used by s. If A and X are both 379 * used, return AX_ATOM. If no register is used, return -1. 380 * 381 * The implementation should probably change to an array access. 382 / 383static int 384atomuse(s) 385* struct stmt s; 386{ 387* register int c = s->code; 388 389 if (c == NOP) 390 return -1; 391 392 switch (BPF_CLASS(c)) { 393 394 case BPF_RET: 395 return (BPF_RVAL(c) == BPF_A) ? A_ATOM : 396 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1; 397 398 case BPF_LD: 399 case BPF_LDX: 400 return (BPF_MODE(c) == BPF_IND) ? X_ATOM : 401 (BPF_MODE(c) == BPF_MEM) ? s->k : -1; 402 403 case BPF_ST: 404 return A_ATOM; 405 406 case BPF_STX: 407 return X_ATOM; 408 409 case BPF_JMP: 410 case BPF_ALU: 411 if (BPF_SRC(c) == BPF_X) 412 return AX_ATOM; 413 return A_ATOM; 414 415 case BPF_MISC: 416 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM; 417 } 418 abort(); 419 /* NOTREACHED / 420} 421* 422/* 423 * Return the register number that is defined by 's'. We assume that 424 * a single stmt cannot define more than one register. If no register 425 * is defined, return -1. 426 * 427 * The implementation should probably change to an array access. 428 / 429static int 430atomdef(s) 431* struct stmt s; 432{ 433* if (s->code == NOP) 434 return -1; 435 436 switch (BPF_CLASS(s->code)) { 437 438 case BPF_LD: 439 case BPF_ALU: 440 return A_ATOM; 441 442 case BPF_LDX: 443 return X_ATOM; 444 445 case BPF_ST: 446 case BPF_STX: 447 return s->k; 448 449 case BPF_MISC: 450 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM; 451 } 452 return -1; 453} 454 455/* 456 * Compute the sets of registers used, defined, and killed by 'b'. 457 * 458 * "Used" means that a statement in 'b' uses the register before any 459 * statement in 'b' defines it, i.e. it uses the value left in 460 * that register by a predecessor block of this block. 461 * "Defined" means that a statement in 'b' defines it. 462 * "Killed" means that a statement in 'b' defines it before any 463 * statement in 'b' uses it, i.e. it kills the value left in that 464 * register by a predecessor block of this block. 465 / 466static void 467compute_local_ud(b) 468* struct block b; 469{ 470* struct slist s; 471* atomset def = 0, use = 0, kill = 0; 472 int atom; 473 474 for (s = b->stmts; s; s = s->next) { 475 if (s->s.code == NOP) 476 continue; 477 atom = atomuse(&s->s); 478 if (atom >= 0) { 479 if (atom == AX_ATOM) { 480 if (!ATOMELEM(def, X_ATOM)) 481 use \|= ATOMMASK(X_ATOM); 482 if (!ATOMELEM(def, A_ATOM)) 483 use \|= ATOMMASK(A_ATOM); 484 } 485 else if (atom < N_ATOMS) { 486 if (!ATOMELEM(def, atom)) 487 use \|= ATOMMASK(atom); 488 } 489 else 490 abort(); 491 } 492 atom = atomdef(&s->s); 493 if (atom >= 0) { 494 if (!ATOMELEM(use, atom)) 495 kill \|= ATOMMASK(atom); 496 def \|= ATOMMASK(atom); 497 } 498 } 499 if (BPF_CLASS(b->s.code) == BPF_JMP) { 500 /* 501 * XXX - what about RET? 502 / 503* atom = atomuse(&b->s); 504 if (atom >= 0) { 505 if (atom == AX_ATOM) { 506 if (!ATOMELEM(def, X_ATOM)) 507 use \|= ATOMMASK(X_ATOM); 508 if (!ATOMELEM(def, A_ATOM)) 509 use \|= ATOMMASK(A_ATOM); 510 } 511 else if (atom < N_ATOMS) { 512 if (!ATOMELEM(def, atom)) 513 use \|= ATOMMASK(atom); 514 } 515 else 516 abort(); 517 } 518 } 519 520 b->def = def; 521 b->kill = kill; 522 b->in_use = use; 523} 524 525/* 526 * Assume graph is already leveled. 527 / 528static void 529find_ud(root) 530* struct block root; 531{ 532* int i, maxlevel; 533 struct block p; 534* 535 /* 536 * root->level is the highest level no found; 537 * count down from there. 538 / 539* maxlevel = root->level; 540 for (i = maxlevel; i >= 0; --i) 541 for (p = levels[i]; p; p = p->link) { 542 compute_local_ud(p); 543 p->out_use = 0; 544 } 545 546 for (i = 1; i <= maxlevel; ++i) { 547 for (p = levels[i]; p; p = p->link) { 548 p->out_use \|= JT(p)->in_use \| JF(p)->in_use; 549 p->in_use \|= p->out_use &~ p->kill; 550 } 551 } 552} 553 554/* 555 * These data structures are used in a Cocke and Shwarz style 556 * value numbering scheme. Since the flowgraph is acyclic, 557 * exit values can be propagated from a node's predecessors 558 * provided it is uniquely defined. 559 / 560struct valnode { 561* int code; 562 int v0, v1; 563 int val; 564 struct valnode next; 565}; 566* 567#define MODULUS 213 568static struct valnode hashtbl[MODULUS]; 569static int curval; 570static int maxval; 571* 572/* Integer constants mapped with the load immediate opcode. / 573#define K(i) F(BPF_LD\|BPF_IMM\|BPF_W, i, 0L) 574* 575struct vmapinfo { 576 int is_const; 577 bpf_int32 const_val; 578}; 579 580struct vmapinfo vmap; 581struct valnode vnode_base; 582struct valnode next_vnode; 583* 584static void 585init_val() 586{ 587 curval = 0; 588 next_vnode = vnode_base; 589 memset((char )vmap, 0, maxval sizeof(vmap)); 590* memset((char )hashtbl, 0, sizeof hashtbl); 591} 592* 593/* Because we really don't have an IR, this stuff is a little messy. / 594static int 595F(code, v0, v1) 596* int code; 597 int v0, v1; 598{ 599 u_int hash; 600 int val; 601 struct valnode p; 602* 603 hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8); 604 hash %= MODULUS; 605 606 for (p = hashtbl[hash]; p; p = p->next) 607 if (p->code == code && p->v0 == v0 && p->v1 == v1) 608 return p->val; 609 610 val = ++curval; 611 if (BPF_MODE(code) == BPF_IMM && 612 (BPF_CLASS(code) == BPF_LD \|\| BPF_CLASS(code) == BPF_LDX)) { 613 vmap[val].const_val = v0; 614 vmap[val].is_const = 1; 615 } 616 p = next_vnode++; 617 p->val = val; 618 p->code = code; 619 p->v0 = v0; 620 p->v1 = v1; 621 p->next = hashtbl[hash]; 622 hashtbl[hash] = p; 623 624 return val; 625} 626 627static inline void 628vstore(s, valp, newval, alter) 629 struct stmt s; 630* int valp; 631* int newval; 632 int alter; 633{ 634 if (alter && valp == newval) 635* s->code = NOP; 636 else 637 valp = newval; 638} 639* 640static void 641fold_op(s, v0, v1) 642 struct stmt s; 643* int v0, v1; 644{ 645 bpf_u_int32 a, b; 646 647 a = vmap[v0].const_val; 648 b = vmap[v1].const_val; 649 650 switch (BPF_OP(s->code)) { 651 case BPF_ADD: 652 a += b; 653 break; 654 655 case BPF_SUB: 656 a -= b; 657 break; 658 659 case BPF_MUL: 660 a = b; 661* break; 662 663 case BPF_DIV: 664 if (b == 0) 665 bpf_error("division by zero"); 666 a /= b; 667 break; 668 669 case BPF_AND: 670 a &= b; 671 break; 672 673 case BPF_OR: 674 a \|= b; 675 break; 676 677 case BPF_LSH: 678 a <<= b; 679 break; 680 681 case BPF_RSH: 682 a >>= b; 683 break; 684 685 case BPF_NEG: 686 a = -a; 687 break; 688 689 default: 690 abort(); 691 } 692 s->k = a; 693 s->code = BPF_LD\|BPF_IMM; 694 done = 0; 695} 696 697static inline struct slist * 698this_op(s) 699 struct slist s; 700{ 701* while (s != 0 && s->s.code == NOP) 702 s = s->next; 703 return s; 704} 705 706static void 707opt_not(b) 708 struct block b; 709{ 710* struct block tmp = JT(b); 711* 712 JT(b) = JF(b); 713 JF(b) = tmp; 714} 715 716static void 717opt_peep(b) 718 struct block b; 719{ 720* struct slist s; 721* struct slist next, last; 722 int val; 723 724 s = b->stmts; 725 if (s == 0) 726 return; 727 728 last = s; 729 for (/empty/; /empty/; s = next) { 730 /* 731 * Skip over nops. 732 / 733* s = this_op(s); 734 if (s == 0) 735 break; /* nothing left in the block / 736* 737 /* 738 * Find the next real instruction after that one 739 * (skipping nops). 740 / 741* next = this_op(s->next); 742 if (next == 0) 743 break; /* no next instruction / 744* last = next; 745 746 /* 747 * st M[k] --> st M[k] 748 * ldx M[k] tax 749 / 750* if (s->s.code == BPF_ST && 751 next->s.code == (BPF_LDX\|BPF_MEM) && 752 s->s.k == next->s.k) { 753 done = 0; 754 next->s.code = BPF_MISC\|BPF_TAX; 755 } 756 /* 757 * ld #k --> ldx #k 758 * tax txa 759 / 760* if (s->s.code == (BPF_LD\|BPF_IMM) && 761 next->s.code == (BPF_MISC\|BPF_TAX)) { 762 s->s.code = BPF_LDX\|BPF_IMM; 763 next->s.code = BPF_MISC\|BPF_TXA; 764 done = 0; 765 } 766 /* 767 * This is an ugly special case, but it happens 768 * when you say tcp[k] or udp[k] where k is a constant. 769 / 770* if (s->s.code == (BPF_LD\|BPF_IMM)) { 771 struct slist add, tax, ild; 772* 773 /* 774 * Check that X isn't used on exit from this 775 * block (which the optimizer might cause). 776 * We know the code generator won't generate 777 * any local dependencies. 778 / 779* if (ATOMELEM(b->out_use, X_ATOM)) 780 continue; 781 782 /* 783 * Check that the instruction following the ldi 784 * is an addx, or it's an ldxms with an addx 785 * following it (with 0 or more nops between the 786 * ldxms and addx). 787 / 788* if (next->s.code != (BPF_LDX\|BPF_MSH\|BPF_B)) 789 add = next; 790 else 791 add = this_op(next->next); 792 if (add == 0 \|\| add->s.code != (BPF_ALU\|BPF_ADD\|BPF_X)) 793 continue; 794 795 /* 796 * Check that a tax follows that (with 0 or more 797 * nops between them). 798 / 799* tax = this_op(add->next); 800 if (tax == 0 \|\| tax->s.code != (BPF_MISC\|BPF_TAX)) 801 continue; 802 803 /* 804 * Check that an ild follows that (with 0 or more 805 * nops between them). 806 / 807* ild = this_op(tax->next); 808 if (ild == 0 \|\| BPF_CLASS(ild->s.code) != BPF_LD \|\| 809 BPF_MODE(ild->s.code) != BPF_IND) 810 continue; 811 /* 812 * We want to turn this sequence: 813 * 814 * (004) ldi #0x2 {s} 815 * (005) ldxms [14] {next} -- optional 816 * (006) addx {add} 817 * (007) tax {tax} 818 * (008) ild [x+0] {ild} 819 * 820 * into this sequence: 821 * 822 * (004) nop 823 * (005) ldxms [14] 824 * (006) nop 825 * (007) nop 826 * (008) ild [x+2] 827 * 828 * XXX We need to check that X is not 829 * subsequently used, because we want to change 830 * what'll be in it after this sequence. 831 * 832 * We know we can eliminate the accumulator 833 * modifications earlier in the sequence since 834 * it is defined by the last stmt of this sequence 835 * (i.e., the last statement of the sequence loads 836 * a value into the accumulator, so we can eliminate 837 * earlier operations on the accumulator). 838 / 839* ild->s.k += s->s.k; 840 s->s.code = NOP; 841 add->s.code = NOP; 842 tax->s.code = NOP; 843 done = 0; 844 } 845 } 846 /* 847 * If the comparison at the end of a block is an equality 848 * comparison against a constant, and nobody uses the value 849 * we leave in the A register at the end of a block, and 850 * the operation preceding the comparison is an arithmetic 851 * operation, we can sometime optimize it away. 852 / 853* if (b->s.code == (BPF_JMP\|BPF_JEQ\|BPF_K) && 854 !ATOMELEM(b->out_use, A_ATOM)) { 855 /* 856 * We can optimize away certain subtractions of the 857 * X register. 858 / 859* if (last->s.code == (BPF_ALU\|BPF_SUB\|BPF_X)) { 860 val = b->val[X_ATOM]; 861 if (vmap[val].is_const) { 862 /* 863 * If we have a subtract to do a comparison, 864 * and the X register is a known constant, 865 * we can merge this value into the 866 * comparison: 867 * 868 * sub x -> nop 869 * jeq #y jeq #(x+y) 870 / 871* b->s.k += vmap[val].const_val; 872 last->s.code = NOP; 873 done = 0; 874 } else if (b->s.k == 0) { 875 /* 876 * If the X register isn't a constant, 877 * and the comparison in the test is 878 * against 0, we can compare with the 879 * X register, instead: 880 * 881 * sub x -> nop 882 * jeq #0 jeq x 883 / 884* last->s.code = NOP; 885 b->s.code = BPF_JMP\|BPF_JEQ\|BPF_X; 886 done = 0; 887 } 888 } 889 /* 890 * Likewise, a constant subtract can be simplified: 891 * 892 * sub #x -> nop 893 * jeq #y -> jeq #(x+y) 894 / 895* else if (last->s.code == (BPF_ALU\|BPF_SUB\|BPF_K)) { 896 last->s.code = NOP; 897 b->s.k += last->s.k; 898 done = 0; 899 } 900 /* 901 * And, similarly, a constant AND can be simplified 902 * if we're testing against 0, i.e.: 903 * 904 * and #k nop 905 * jeq #0 -> jset #k 906 / 907* else if (last->s.code == (BPF_ALU\|BPF_AND\|BPF_K) && 908 b->s.k == 0) { 909 b->s.k = last->s.k; 910 b->s.code = BPF_JMP\|BPF_K\|BPF_JSET; 911 last->s.code = NOP; 912 done = 0; 913 opt_not(b); 914 } 915 } 916 /* 917 * jset #0 -> never 918 * jset #ffffffff -> always 919 / 920* if (b->s.code == (BPF_JMP\|BPF_K\|BPF_JSET)) { 921 if (b->s.k == 0) 922 JT(b) = JF(b); 923 if (b->s.k == 0xffffffff) 924 JF(b) = JT(b); 925 } 926 /* 927 * If we're comparing against the index register, and the index 928 * register is a known constant, we can just compare against that 929 * constant. 930 / 931* val = b->val[X_ATOM]; 932 if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) { 933 bpf_int32 v = vmap[val].const_val; 934 b->s.code &= ~BPF_X; 935 b->s.k = v; 936 } 937 /* 938 * If the accumulator is a known constant, we can compute the 939 * comparison result. 940 / 941* val = b->val[A_ATOM]; 942 if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) { 943 bpf_int32 v = vmap[val].const_val; 944 switch (BPF_OP(b->s.code)) { 945 946 case BPF_JEQ: 947 v = v == b->s.k; 948 break; 949 950 case BPF_JGT: 951 v = (unsigned)v > b->s.k; 952 break; 953 954 case BPF_JGE: 955 v = (unsigned)v >= b->s.k; 956 break; 957 958 case BPF_JSET: 959 v &= b->s.k; 960 break; 961 962 default: 963 abort(); 964 } 965 if (JF(b) != JT(b)) 966 done = 0; 967 if (v) 968 JF(b) = JT(b); 969 else 970 JT(b) = JF(b); 971 } 972} 973 974/* 975 * Compute the symbolic value of expression of 's', and update 976 * anything it defines in the value table 'val'. If 'alter' is true, 977 * do various optimizations. This code would be cleaner if symbolic 978 * evaluation and code transformations weren't folded together. 979 / 980static void 981opt_stmt(s, val, alter) 982* struct stmt s; 983* int val[]; 984 int alter; 985{ 986 int op; 987 int v; 988 989 switch (s->code) { 990 991 case BPF_LD\|BPF_ABS\|BPF_W: 992 case BPF_LD\|BPF_ABS\|BPF_H: 993 case BPF_LD\|BPF_ABS\|BPF_B: 994 v = F(s->code, s->k, 0L); 995 vstore(s, &val[A_ATOM], v, alter); 996 break; 997 998 case BPF_LD\|BPF_IND\|BPF_W: 999 case BPF_LD\|BPF_IND\|BPF_H: 1000 case BPF_LD\|BPF_IND\|BPF_B: 1001 v = val[X_ATOM]; 1002 if (alter && vmap[v].is_const) { 1003 s->code = BPF_LD\|BPF_ABS\|BPF_SIZE(s->code); 1004 s->k += vmap[v].const_val; 1005 v = F(s->code, s->k, 0L); 1006 done = 0; 1007 } 1008 else 1009 v = F(s->code, s->k, v); 1010 vstore(s, &val[A_ATOM], v, alter); 1011 break; 1012 1013 case BPF_LD\|BPF_LEN: 1014 v = F(s->code, 0L, 0L); 1015 vstore(s, &val[A_ATOM], v, alter); 1016 break; 1017 1018 case BPF_LD\|BPF_IMM: 1019 v = K(s->k); 1020 vstore(s, &val[A_ATOM], v, alter); 1021 break; 1022 1023 case BPF_LDX\|BPF_IMM: 1024 v = K(s->k); 1025 vstore(s, &val[X_ATOM], v, alter); 1026 break; 1027 1028 case BPF_LDX\|BPF_MSH\|BPF_B: 1029 v = F(s->code, s->k, 0L); 1030 vstore(s, &val[X_ATOM], v, alter); 1031 break; 1032 1033 case BPF_ALU\|BPF_NEG: 1034 if (alter && vmap[val[A_ATOM]].is_const) { 1035 s->code = BPF_LD\|BPF_IMM; 1036 s->k = -vmap[val[A_ATOM]].const_val; 1037 val[A_ATOM] = K(s->k); 1038 } 1039 else 1040 val[A_ATOM] = F(s->code, val[A_ATOM], 0L); 1041 break; 1042 1043 case BPF_ALU\|BPF_ADD\|BPF_K: 1044 case BPF_ALU\|BPF_SUB\|BPF_K: 1045 case BPF_ALU\|BPF_MUL\|BPF_K: 1046 case BPF_ALU\|BPF_DIV\|BPF_K: 1047 case BPF_ALU\|BPF_AND\|BPF_K: 1048 case BPF_ALU\|BPF_OR\|BPF_K: 1049 case BPF_ALU\|BPF_LSH\|BPF_K: 1050 case BPF_ALU\|BPF_RSH\|BPF_K: 1051 op = BPF_OP(s->code); 1052 if (alter) { 1053 if (s->k == 0) { 1054 /* don't optimize away "sub #0" 1055 * as it may be needed later to 1056 * fixup the generated math code / 1057* if (op == BPF_ADD \|\| 1058 op == BPF_LSH \|\| op == BPF_RSH \|\| 1059 op == BPF_OR) { 1060 s->code = NOP; 1061 break; 1062 } 1063 if (op == BPF_MUL \|\| op == BPF_AND) { 1064 s->code = BPF_LD\|BPF_IMM; 1065 val[A_ATOM] = K(s->k); 1066 break; 1067 } 1068 } 1069 if (vmap[val[A_ATOM]].is_const) { 1070 fold_op(s, val[A_ATOM], K(s->k)); 1071 val[A_ATOM] = K(s->k); 1072 break; 1073 } 1074 } 1075 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k)); 1076 break; 1077 1078 case BPF_ALU\|BPF_ADD\|BPF_X: 1079 case BPF_ALU\|BPF_SUB\|BPF_X: 1080 case BPF_ALU\|BPF_MUL\|BPF_X: 1081 case BPF_ALU\|BPF_DIV\|BPF_X: 1082 case BPF_ALU\|BPF_AND\|BPF_X: 1083 case BPF_ALU\|BPF_OR\|BPF_X: 1084 case BPF_ALU\|BPF_LSH\|BPF_X: 1085 case BPF_ALU\|BPF_RSH\|BPF_X: 1086 op = BPF_OP(s->code); 1087 if (alter && vmap[val[X_ATOM]].is_const) { 1088 if (vmap[val[A_ATOM]].is_const) { 1089 fold_op(s, val[A_ATOM], val[X_ATOM]); 1090 val[A_ATOM] = K(s->k); 1091 } 1092 else { 1093 s->code = BPF_ALU\|BPF_K\|op; 1094 s->k = vmap[val[X_ATOM]].const_val; 1095 done = 0; 1096 val[A_ATOM] = 1097 F(s->code, val[A_ATOM], K(s->k)); 1098 } 1099 break; 1100 } 1101 /* 1102 * Check if we're doing something to an accumulator 1103 * that is 0, and simplify. This may not seem like 1104 * much of a simplification but it could open up further 1105 * optimizations. 1106 * XXX We could also check for mul by 1, etc. 1107 / 1108* if (alter && vmap[val[A_ATOM]].is_const 1109 && vmap[val[A_ATOM]].const_val == 0) { 1110 if (op == BPF_ADD \|\| op == BPF_OR) { 1111 s->code = BPF_MISC\|BPF_TXA; 1112 vstore(s, &val[A_ATOM], val[X_ATOM], alter); 1113 break; 1114 } 1115 else if (op == BPF_MUL \|\| op == BPF_DIV \|\| 1116 op == BPF_AND \|\| op == BPF_LSH \|\| op == BPF_RSH) { 1117 s->code = BPF_LD\|BPF_IMM; 1118 s->k = 0; 1119 vstore(s, &val[A_ATOM], K(s->k), alter); 1120 break; 1121 } 1122 else if (op == BPF_NEG) { 1123 s->code = NOP; 1124 break; 1125 } 1126 } 1127 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]); 1128 break; 1129 1130 case BPF_MISC\|BPF_TXA: 1131 vstore(s, &val[A_ATOM], val[X_ATOM], alter); 1132 break; 1133 1134 case BPF_LD\|BPF_MEM: 1135 v = val[s->k]; 1136 if (alter && vmap[v].is_const) { 1137 s->code = BPF_LD\|BPF_IMM; 1138 s->k = vmap[v].const_val; 1139 done = 0; 1140 } 1141 vstore(s, &val[A_ATOM], v, alter); 1142 break; 1143 1144 case BPF_MISC\|BPF_TAX: 1145 vstore(s, &val[X_ATOM], val[A_ATOM], alter); 1146 break; 1147 1148 case BPF_LDX\|BPF_MEM: 1149 v = val[s->k]; 1150 if (alter && vmap[v].is_const) { 1151 s->code = BPF_LDX\|BPF_IMM; 1152 s->k = vmap[v].const_val; 1153 done = 0; 1154 } 1155 vstore(s, &val[X_ATOM], v, alter); 1156 break; 1157 1158 case BPF_ST: 1159 vstore(s, &val[s->k], val[A_ATOM], alter); 1160 break; 1161 1162 case BPF_STX: 1163 vstore(s, &val[s->k], val[X_ATOM], alter); 1164 break; 1165 } 1166} 1167 1168static void 1169deadstmt(s, last) 1170 register struct stmt s; 1171* register struct stmt last[]; 1172{ 1173* register int atom; 1174 1175 atom = atomuse(s); 1176 if (atom >= 0) { 1177 if (atom == AX_ATOM) { 1178 last[X_ATOM] = 0; 1179 last[A_ATOM] = 0; 1180 } 1181 else 1182 last[atom] = 0; 1183 } 1184 atom = atomdef(s); 1185 if (atom >= 0) { 1186 if (last[atom]) { 1187 done = 0; 1188 last[atom]->code = NOP; 1189 } 1190 last[atom] = s; 1191 } 1192} 1193 1194static void 1195opt_deadstores(b) 1196 register struct block b; 1197{ 1198* register struct slist s; 1199* register int atom; 1200 struct stmt last[N_ATOMS]; 1201* 1202 memset((char )last, 0, sizeof last); 1203* 1204 for (s = b->stmts; s != 0; s = s->next) 1205 deadstmt(&s->s, last); 1206 deadstmt(&b->s, last); 1207 1208 for (atom = 0; atom < N_ATOMS; ++atom) 1209 if (last[atom] && !ATOMELEM(b->out_use, atom)) { 1210 last[atom]->code = NOP; 1211 done = 0; 1212 } 1213} 1214 1215static void 1216opt_blk(b, do_stmts) 1217 struct block b; 1218* int do_stmts; 1219{ 1220 struct slist s; 1221* struct edge p; 1222* int i; 1223 bpf_int32 aval, xval; 1224 1225#if 0 1226 for (s = b->stmts; s && s->next; s = s->next) 1227 if (BPF_CLASS(s->s.code) == BPF_JMP) { 1228 do_stmts = 0; 1229 break; 1230 } 1231#endif 1232 1233 /* 1234 * Initialize the atom values. 1235 / 1236* p = b->in_edges; 1237 if (p == 0) { 1238 /* 1239 * We have no predecessors, so everything is undefined 1240 * upon entry to this block. 1241 / 1242* memset((char )b->val, 0, sizeof(b->val)); 1243* } else { 1244 /* 1245 * Inherit values from our predecessors. 1246 * 1247 * First, get the values from the predecessor along the 1248 * first edge leading to this node. 1249 / 1250* memcpy((char )b->val, (char )p->pred->val, sizeof(b->val)); 1251 /* 1252 * Now look at all the other nodes leading to this node. 1253 * If, for the predecessor along that edge, a register 1254 * has a different value from the one we have (i.e., 1255 * control paths are merging, and the merging paths 1256 * assign different values to that register), give the 1257 * register the undefined value of 0. 1258 / 1259* while ((p = p->next) != NULL) { 1260 for (i = 0; i < N_ATOMS; ++i) 1261 if (b->val[i] != p->pred->val[i]) 1262 b->val[i] = 0; 1263 } 1264 } 1265 aval = b->val[A_ATOM]; 1266 xval = b->val[X_ATOM]; 1267 for (s = b->stmts; s; s = s->next) 1268 opt_stmt(&s->s, b->val, do_stmts); 1269 1270 /* 1271 * This is a special case: if we don't use anything from this 1272 * block, and we load the accumulator or index register with a 1273 * value that is already there, or if this block is a return, 1274 * eliminate all the statements. 1275 * 1276 * XXX - what if it does a store? 1277 * 1278 * XXX - why does it matter whether we use anything from this 1279 * block? If the accumulator or index register doesn't change 1280 * its value, isn't that OK even if we use that value? 1281 * 1282 * XXX - if we load the accumulator with a different value, 1283 * and the block ends with a conditional branch, we obviously 1284 * can't eliminate it, as the branch depends on that value. 1285 * For the index register, the conditional branch only depends 1286 * on the index register value if the test is against the index 1287 * register value rather than a constant; if nothing uses the 1288 * value we put into the index register, and we're not testing 1289 * against the index register's value, and there aren't any 1290 * other problems that would keep us from eliminating this 1291 * block, can we eliminate it? 1292 / 1293* if (do_stmts && 1294 ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval && 1295 xval != 0 && b->val[X_ATOM] == xval) \|\| 1296 BPF_CLASS(b->s.code) == BPF_RET)) { 1297 if (b->stmts != 0) { 1298 b->stmts = 0; 1299 done = 0; 1300 } 1301 } else { 1302 opt_peep(b); 1303 opt_deadstores(b); 1304 } 1305 /* 1306 * Set up values for branch optimizer. 1307 / 1308* if (BPF_SRC(b->s.code) == BPF_K) 1309 b->oval = K(b->s.k); 1310 else 1311 b->oval = b->val[X_ATOM]; 1312 b->et.code = b->s.code; 1313 b->ef.code = -b->s.code; 1314} 1315 1316/* 1317 * Return true if any register that is used on exit from 'succ', has 1318 * an exit value that is different from the corresponding exit value 1319 * from 'b'. 1320 / 1321static int 1322use_conflict(b, succ) 1323* struct block b, succ; 1324{ 1325 int atom; 1326 atomset use = succ->out_use; 1327 1328 if (use == 0) 1329 return 0; 1330 1331 for (atom = 0; atom < N_ATOMS; ++atom) 1332 if (ATOMELEM(use, atom)) 1333 if (b->val[atom] != succ->val[atom]) 1334 return 1; 1335 return 0; 1336} 1337 1338static struct block * 1339fold_edge(child, ep) 1340 struct block child; 1341* struct edge ep; 1342{ 1343* int sense; 1344 int aval0, aval1, oval0, oval1; 1345 int code = ep->code; 1346 1347 if (code < 0) { 1348 code = -code; 1349 sense = 0; 1350 } else 1351 sense = 1; 1352 1353 if (child->s.code != code) 1354 return 0; 1355 1356 aval0 = child->val[A_ATOM]; 1357 oval0 = child->oval; 1358 aval1 = ep->pred->val[A_ATOM]; 1359 oval1 = ep->pred->oval; 1360 1361 if (aval0 != aval1) 1362 return 0; 1363 1364 if (oval0 == oval1) 1365 /* 1366 * The operands of the branch instructions are 1367 * identical, so the result is true if a true 1368 * branch was taken to get here, otherwise false. 1369 / 1370* return sense ? JT(child) : JF(child); 1371 1372 if (sense && code == (BPF_JMP\|BPF_JEQ\|BPF_K)) 1373 /* 1374 * At this point, we only know the comparison if we 1375 * came down the true branch, and it was an equality 1376 * comparison with a constant. 1377 * 1378 * I.e., if we came down the true branch, and the branch 1379 * was an equality comparison with a constant, we know the 1380 * accumulator contains that constant. If we came down 1381 * the false branch, or the comparison wasn't with a 1382 * constant, we don't know what was in the accumulator. 1383 * 1384 * We rely on the fact that distinct constants have distinct 1385 * value numbers. 1386 / 1387* return JF(child); 1388 1389 return 0; 1390} 1391 1392static void 1393opt_j(ep) 1394 struct edge ep; 1395{ 1396* register int i, k; 1397 register struct block target; 1398* 1399 if (JT(ep->succ) == 0) 1400 return; 1401 1402 if (JT(ep->succ) == JF(ep->succ)) { 1403 /* 1404 * Common branch targets can be eliminated, provided 1405 * there is no data dependency. 1406 / 1407* if (!use_conflict(ep->pred, ep->succ->et.succ)) { 1408 done = 0; 1409 ep->succ = JT(ep->succ); 1410 } 1411 } 1412 /* 1413 * For each edge dominator that matches the successor of this 1414 * edge, promote the edge successor to the its grandchild. 1415 * 1416 * XXX We violate the set abstraction here in favor a reasonably 1417 * efficient loop. 1418 / 1419* top: 1420 for (i = 0; i < edgewords; ++i) { 1421 register bpf_u_int32 x = ep->edom[i]; 1422 1423 while (x != 0) { 1424 k = ffs(x) - 1; 1425 x &=~ (1 << k); 1426 k += i * BITS_PER_WORD; 1427 1428 target = fold_edge(ep->succ, edges[k]); 1429 /* 1430 * Check that there is no data dependency between 1431 * nodes that will be violated if we move the edge. 1432 / 1433* if (target != 0 && !use_conflict(ep->pred, target)) { 1434 done = 0; 1435 ep->succ = target; 1436 if (JT(target) != 0) 1437 /* 1438 * Start over unless we hit a leaf. 1439 / 1440* goto top; 1441 return; 1442 } 1443 } 1444 } 1445} 1446 1447 1448static void 1449or_pullup(b) 1450 struct block b; 1451{ 1452* int val, at_top; 1453 struct block pull; 1454* struct block diffp, samep; 1455 struct edge ep; 1456* 1457 ep = b->in_edges; 1458 if (ep == 0) 1459 return; 1460 1461 /* 1462 * Make sure each predecessor loads the same value. 1463 * XXX why? 1464 / 1465* val = ep->pred->val[A_ATOM]; 1466 for (ep = ep->next; ep != 0; ep = ep->next) 1467 if (val != ep->pred->val[A_ATOM]) 1468 return; 1469 1470 if (JT(b->in_edges->pred) == b) 1471 diffp = &JT(b->in_edges->pred); 1472 else 1473 diffp = &JF(b->in_edges->pred); 1474 1475 at_top = 1; 1476 while (1) { 1477 if (diffp == 0) 1478* return; 1479 1480 if (JT(diffp) != JT(b)) 1481* return; 1482 1483 if (!SET_MEMBER((diffp)->dom, b->id)) 1484* return; 1485 1486 if ((diffp)->val[A_ATOM] != val) 1487* break; 1488 1489 diffp = &JF(diffp); 1490* at_top = 0; 1491 } 1492 samep = &JF(diffp); 1493* while (1) { 1494 if (samep == 0) 1495* return; 1496 1497 if (JT(samep) != JT(b)) 1498* return; 1499 1500 if (!SET_MEMBER((samep)->dom, b->id)) 1501* return; 1502 1503 if ((samep)->val[A_ATOM] == val) 1504* break; 1505 1506 /* XXX Need to check that there are no data dependencies 1507 between dp0 and dp1. Currently, the code generator 1508 will not produce such dependencies. / 1509* samep = &JF(samep); 1510* } 1511#ifdef notdef 1512 /* XXX This doesn't cover everything. / 1513* for (i = 0; i < N_ATOMS; ++i) 1514 if ((samep)->val[i] != pred->val[i]) 1515* return; 1516#endif 1517 /* Pull up the node. / 1518* pull = samep; 1519* samep = JF(pull); 1520* JF(pull) = diffp; 1521* 1522 /* 1523 * At the top of the chain, each predecessor needs to point at the 1524 * pulled up node. Inside the chain, there is only one predecessor 1525 * to worry about. 1526 / 1527* if (at_top) { 1528 for (ep = b->in_edges; ep != 0; ep = ep->next) { 1529 if (JT(ep->pred) == b) 1530 JT(ep->pred) = pull; 1531 else 1532 JF(ep->pred) = pull; 1533 } 1534 } 1535 else 1536 diffp = pull; 1537* 1538 done = 0; 1539} 1540 1541static void 1542and_pullup(b) 1543 struct block b; 1544{ 1545* int val, at_top; 1546 struct block pull; 1547* struct block diffp, samep; 1548 struct edge ep; 1549* 1550 ep = b->in_edges; 1551 if (ep == 0) 1552 return; 1553 1554 /* 1555 * Make sure each predecessor loads the same value. 1556 / 1557* val = ep->pred->val[A_ATOM]; 1558 for (ep = ep->next; ep != 0; ep = ep->next) 1559 if (val != ep->pred->val[A_ATOM]) 1560 return; 1561 1562 if (JT(b->in_edges->pred) == b) 1563 diffp = &JT(b->in_edges->pred); 1564 else 1565 diffp = &JF(b->in_edges->pred); 1566 1567 at_top = 1; 1568 while (1) { 1569 if (diffp == 0) 1570* return; 1571 1572 if (JF(diffp) != JF(b)) 1573* return; 1574 1575 if (!SET_MEMBER((diffp)->dom, b->id)) 1576* return; 1577 1578 if ((diffp)->val[A_ATOM] != val) 1579* break; 1580 1581 diffp = &JT(diffp); 1582* at_top = 0; 1583 } 1584 samep = &JT(diffp); 1585* while (1) { 1586 if (samep == 0) 1587* return; 1588 1589 if (JF(samep) != JF(b)) 1590* return; 1591 1592 if (!SET_MEMBER((samep)->dom, b->id)) 1593* return; 1594 1595 if ((samep)->val[A_ATOM] == val) 1596* break; 1597 1598 /* XXX Need to check that there are no data dependencies 1599 between diffp and samep. Currently, the code generator 1600 will not produce such dependencies. / 1601* samep = &JT(samep); 1602* } 1603#ifdef notdef 1604 /* XXX This doesn't cover everything. / 1605* for (i = 0; i < N_ATOMS; ++i) 1606 if ((samep)->val[i] != pred->val[i]) 1607* return; 1608#endif 1609 /* Pull up the node. / 1610* pull = samep; 1611* samep = JT(pull); 1612* JT(pull) = diffp; 1613* 1614 /* 1615 * At the top of the chain, each predecessor needs to point at the 1616 * pulled up node. Inside the chain, there is only one predecessor 1617 * to worry about. 1618 / 1619* if (at_top) { 1620 for (ep = b->in_edges; ep != 0; ep = ep->next) { 1621 if (JT(ep->pred) == b) 1622 JT(ep->pred) = pull; 1623 else 1624 JF(ep->pred) = pull; 1625 } 1626 } 1627 else 1628 diffp = pull; 1629* 1630 done = 0; 1631} 1632 1633static void 1634opt_blks(root, do_stmts) 1635 struct block root; 1636* int do_stmts; 1637{ 1638 int i, maxlevel; 1639 struct block p; 1640* 1641 init_val(); 1642 maxlevel = root->level; 1643 1644 find_inedges(root); 1645 for (i = maxlevel; i >= 0; --i) 1646 for (p = levels[i]; p; p = p->link) 1647 opt_blk(p, do_stmts); 1648 1649 if (do_stmts) 1650 /* 1651 * No point trying to move branches; it can't possibly 1652 * make a difference at this point. 1653 / 1654* return; 1655 1656 for (i = 1; i <= maxlevel; ++i) { 1657 for (p = levels[i]; p; p = p->link) { 1658 opt_j(&p->et); 1659 opt_j(&p->ef); 1660 } 1661 } 1662 1663 find_inedges(root); 1664 for (i = 1; i <= maxlevel; ++i) { 1665 for (p = levels[i]; p; p = p->link) { 1666 or_pullup(p); 1667 and_pullup(p); 1668 } 1669 } 1670} 1671 1672static inline void 1673link_inedge(parent, child) 1674 struct edge parent; 1675* struct block child; 1676{ 1677* parent->next = child->in_edges; 1678 child->in_edges = parent; 1679} 1680 1681static void 1682find_inedges(root) 1683 struct block root; 1684{ 1685* int i; 1686 struct block b; 1687* 1688 for (i = 0; i < n_blocks; ++i) 1689 blocks[i]->in_edges = 0; 1690 1691 /* 1692 * Traverse the graph, adding each edge to the predecessor 1693 * list of its successors. Skip the leaves (i.e. level 0). 1694 / 1695* for (i = root->level; i > 0; --i) { 1696 for (b = levels[i]; b != 0; b = b->link) { 1697 link_inedge(&b->et, JT(b)); 1698 link_inedge(&b->ef, JF(b)); 1699 } 1700 } 1701} 1702 1703static void 1704opt_root(b) 1705 struct block *b; 1706{ 1707* struct slist tmp, s; 1708 1709 s = (b)->stmts; 1710* (b)->stmts = 0; 1711* while (BPF_CLASS((b)->s.code) == BPF_JMP && JT(b) == JF(b)) 1712* b = JT(b); 1713 1714 tmp = (b)->stmts; 1715* if (tmp != 0) 1716 sappend(s, tmp); 1717 (b)->stmts = s; 1718* 1719 /* 1720 * If the root node is a return, then there is no 1721 * point executing any statements (since the bpf machine 1722 * has no side effects). 1723 / 1724* if (BPF_CLASS((b)->s.code) == BPF_RET) 1725* (b)->stmts = 0; 1726} 1727* 1728static void 1729opt_loop(root, do_stmts) 1730 struct block root; 1731* int do_stmts; 1732{ 1733 1734#ifdef BDEBUG 1735 if (dflag > 1) { 1736 printf("opt_loop(root, %d) begin\n", do_stmts); 1737 opt_dump(root); 1738 } 1739#endif 1740 do { 1741 done = 1; 1742 find_levels(root); 1743 find_dom(root); 1744 find_closure(root); 1745 find_ud(root); 1746 find_edom(root); 1747 opt_blks(root, do_stmts); 1748#ifdef BDEBUG 1749 if (dflag > 1) { 1750 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done); 1751 opt_dump(root); 1752 } 1753#endif 1754 } while (!done); 1755} 1756 1757/* 1758 * Optimize the filter code in its dag representation. 1759 / 1760void 1761bpf_optimize(rootp) 1762* struct block *rootp; 1763{ 1764* struct block root; 1765* 1766 root = rootp; 1767* 1768 opt_init(root); 1769 opt_loop(root, 0); 1770 opt_loop(root, 1); 1771 intern_blocks(root); 1772#ifdef BDEBUG 1773 if (dflag > 1) { 1774 printf("after intern_blocks()\n"); 1775 opt_dump(root); 1776 } 1777#endif 1778 opt_root(rootp); 1779#ifdef BDEBUG 1780 if (dflag > 1) { 1781 printf("after opt_root()\n"); 1782 opt_dump(root); 1783 } 1784#endif 1785 opt_cleanup(); 1786} 1787 1788static void 1789make_marks(p) 1790 struct block p; 1791{ 1792* if (!isMarked(p)) { 1793 Mark(p); 1794 if (BPF_CLASS(p->s.code) != BPF_RET) { 1795 make_marks(JT(p)); 1796 make_marks(JF(p)); 1797 } 1798 } 1799} 1800 1801/* 1802 * Mark code array such that isMarked(i) is true 1803 * only for nodes that are alive. 1804 / 1805static void 1806mark_code(p) 1807* struct block p; 1808{ 1809* cur_mark += 1; 1810 make_marks(p); 1811} 1812 1813/* 1814 * True iff the two stmt lists load the same value from the packet into 1815 * the accumulator. 1816 / 1817static int 1818eq_slist(x, y) 1819* struct slist x, y; 1820{ 1821 while (1) { 1822 while (x && x->s.code == NOP) 1823 x = x->next; 1824 while (y && y->s.code == NOP) 1825 y = y->next; 1826 if (x == 0) 1827 return y == 0; 1828 if (y == 0) 1829 return x == 0; 1830 if (x->s.code != y->s.code \|\| x->s.k != y->s.k) 1831 return 0; 1832 x = x->next; 1833 y = y->next; 1834 } 1835} 1836 1837static inline int 1838eq_blk(b0, b1) 1839 struct block b0, b1; 1840{ 1841 if (b0->s.code == b1->s.code && 1842 b0->s.k == b1->s.k && 1843 b0->et.succ == b1->et.succ && 1844 b0->ef.succ == b1->ef.succ) 1845 return eq_slist(b0->stmts, b1->stmts); 1846 return 0; 1847} 1848 1849static void 1850intern_blocks(root) 1851 struct block root; 1852{ 1853* struct block p; 1854* int i, j; 1855 int done1; /* don't shadow global / 1856* top: 1857 done1 = 1; 1858 for (i = 0; i < n_blocks; ++i) 1859 blocks[i]->link = 0; 1860 1861 mark_code(root); 1862 1863 for (i = n_blocks - 1; --i >= 0; ) { 1864 if (!isMarked(blocks[i])) 1865 continue; 1866 for (j = i + 1; j < n_blocks; ++j) { 1867 if (!isMarked(blocks[j])) 1868 continue; 1869 if (eq_blk(blocks[i], blocks[j])) { 1870 blocks[i]->link = blocks[j]->link ? 1871 blocks[j]->link : blocks[j]; 1872 break; 1873 } 1874 } 1875 } 1876 for (i = 0; i < n_blocks; ++i) { 1877 p = blocks[i]; 1878 if (JT(p) == 0) 1879 continue; 1880 if (JT(p)->link) { 1881 done1 = 0; 1882 JT(p) = JT(p)->link; 1883 } 1884 if (JF(p)->link) { 1885 done1 = 0; 1886 JF(p) = JF(p)->link; 1887 } 1888 } 1889 if (!done1) 1890 goto top; 1891} 1892 1893static void 1894opt_cleanup() 1895{ 1896 free((void )vnode_base); 1897* free((void )vmap); 1898* free((void )edges); 1899* free((void )space); 1900* free((void )levels); 1901* free((void )blocks); 1902} 1903* 1904/* 1905 * Return the number of stmts in 's'. 1906 */
1907static int	1907static u_int
1908slength(s) 1909 struct slist s; 1910*{	1908slength(s) 1909 struct slist s; 1910*{
1911 int n = 0;	1911 u_int n = 0;
1912 1913 for (; s; s = s->next) 1914 if (s->s.code != NOP) 1915 ++n; 1916 return n; 1917} 1918 1919/* 1920 * Return the number of nodes reachable by 'p'. 1921 * All nodes should be initially unmarked. 1922 / 1923static int 1924count_blocks(p) 1925* struct block p; 1926{ 1927* if (p == 0 \|\| isMarked(p)) 1928 return 0; 1929 Mark(p); 1930 return count_blocks(JT(p)) + count_blocks(JF(p)) + 1; 1931} 1932 1933/* 1934 * Do a depth first search on the flow graph, numbering the 1935 * the basic blocks, and entering them into the 'blocks' array.` 1936 / 1937static void 1938number_blks_r(p) 1939* struct block p; 1940{ 1941* int n; 1942 1943 if (p == 0 \|\| isMarked(p)) 1944 return; 1945 1946 Mark(p); 1947 n = n_blocks++; 1948 p->id = n; 1949 blocks[n] = p; 1950 1951 number_blks_r(JT(p)); 1952 number_blks_r(JF(p)); 1953} 1954 1955/* 1956 * Return the number of stmts in the flowgraph reachable by 'p'. 1957 * The nodes should be unmarked before calling. 1958 * 1959 * Note that "stmts" means "instructions", and that this includes 1960 * 1961 * side-effect statements in 'p' (slength(p->stmts)); 1962 * 1963 * statements in the true branch from 'p' (count_stmts(JT(p))); 1964 * 1965 * statements in the false branch from 'p' (count_stmts(JF(p))); 1966 * 1967 * the conditional jump itself (1); 1968 * 1969 * an extra long jump if the true branch requires it (p->longjt); 1970 * 1971 * an extra long jump if the false branch requires it (p->longjf). 1972 */	1912 1913 for (; s; s = s->next) 1914 if (s->s.code != NOP) 1915 ++n; 1916 return n; 1917} 1918 1919/* 1920 * Return the number of nodes reachable by 'p'. 1921 * All nodes should be initially unmarked. 1922 / 1923static int 1924count_blocks(p) 1925* struct block p; 1926{ 1927* if (p == 0 \|\| isMarked(p)) 1928 return 0; 1929 Mark(p); 1930 return count_blocks(JT(p)) + count_blocks(JF(p)) + 1; 1931} 1932 1933/* 1934 * Do a depth first search on the flow graph, numbering the 1935 * the basic blocks, and entering them into the 'blocks' array.` 1936 / 1937static void 1938number_blks_r(p) 1939* struct block p; 1940{ 1941* int n; 1942 1943 if (p == 0 \|\| isMarked(p)) 1944 return; 1945 1946 Mark(p); 1947 n = n_blocks++; 1948 p->id = n; 1949 blocks[n] = p; 1950 1951 number_blks_r(JT(p)); 1952 number_blks_r(JF(p)); 1953} 1954 1955/* 1956 * Return the number of stmts in the flowgraph reachable by 'p'. 1957 * The nodes should be unmarked before calling. 1958 * 1959 * Note that "stmts" means "instructions", and that this includes 1960 * 1961 * side-effect statements in 'p' (slength(p->stmts)); 1962 * 1963 * statements in the true branch from 'p' (count_stmts(JT(p))); 1964 * 1965 * statements in the false branch from 'p' (count_stmts(JF(p))); 1966 * 1967 * the conditional jump itself (1); 1968 * 1969 * an extra long jump if the true branch requires it (p->longjt); 1970 * 1971 * an extra long jump if the false branch requires it (p->longjf). 1972 */
1973static int	1973static u_int
1974count_stmts(p) 1975 struct block p; 1976*{	1974count_stmts(p) 1975 struct block p; 1976*{
1977 int n;	1977 u_int n;
1978 1979 if (p == 0 \|\| isMarked(p)) 1980 return 0; 1981 Mark(p); 1982 n = count_stmts(JT(p)) + count_stmts(JF(p)); 1983 return slength(p->stmts) + n + 1 + p->longjt + p->longjf; 1984} 1985 1986/* 1987 * Allocate memory. All allocation is done before optimization 1988 * is begun. A linear bound on the size of all data structures is computed 1989 * from the total number of blocks and/or statements. 1990 / 1991static void 1992opt_init(root) 1993* struct block root; 1994{ 1995* bpf_u_int32 p; 1996* int i, n, max_stmts; 1997 1998 /* 1999 * First, count the blocks, so we can malloc an array to map 2000 * block number to block. Then, put the blocks into the array. 2001 / 2002* unMarkAll(); 2003 n = count_blocks(root); 2004 blocks = (struct block *)calloc(n, sizeof(blocks)); 2005 if (blocks == NULL) 2006 bpf_error("malloc"); 2007 unMarkAll(); 2008 n_blocks = 0; 2009 number_blks_r(root); 2010 2011 n_edges = 2 * n_blocks; 2012 edges = (struct edge *)calloc(n_edges, sizeof(edges)); 2013 if (edges == NULL) 2014 bpf_error("malloc"); 2015 2016 /* 2017 * The number of levels is bounded by the number of nodes. 2018 / 2019* levels = (struct block *)calloc(n_blocks, sizeof(levels)); 2020 if (levels == NULL) 2021 bpf_error("malloc"); 2022 2023 edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1; 2024 nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1; 2025 2026 /* XXX / 2027* space = (bpf_u_int32 )malloc(2 n_blocks * nodewords * sizeof(space) 2028* + n_edges * edgewords * sizeof(space)); 2029* if (space == NULL) 2030 bpf_error("malloc"); 2031 p = space; 2032 all_dom_sets = p; 2033 for (i = 0; i < n; ++i) { 2034 blocks[i]->dom = p; 2035 p += nodewords; 2036 } 2037 all_closure_sets = p; 2038 for (i = 0; i < n; ++i) { 2039 blocks[i]->closure = p; 2040 p += nodewords; 2041 } 2042 all_edge_sets = p; 2043 for (i = 0; i < n; ++i) { 2044 register struct block b = blocks[i]; 2045* 2046 b->et.edom = p; 2047 p += edgewords; 2048 b->ef.edom = p; 2049 p += edgewords; 2050 b->et.id = i; 2051 edges[i] = &b->et; 2052 b->ef.id = n_blocks + i; 2053 edges[n_blocks + i] = &b->ef; 2054 b->et.pred = b; 2055 b->ef.pred = b; 2056 } 2057 max_stmts = 0; 2058 for (i = 0; i < n; ++i) 2059 max_stmts += slength(blocks[i]->stmts) + 1; 2060 /* 2061 * We allocate at most 3 value numbers per statement, 2062 * so this is an upper bound on the number of valnodes 2063 * we'll need. 2064 / 2065* maxval = 3 * max_stmts; 2066 vmap = (struct vmapinfo )calloc(maxval, sizeof(vmap)); 2067 vnode_base = (struct valnode )calloc(maxval, sizeof(vnode_base)); 2068 if (vmap == NULL \|\| vnode_base == NULL) 2069 bpf_error("malloc"); 2070} 2071 2072/* 2073 * Some pointers used to convert the basic block form of the code, 2074 * into the array form that BPF requires. 'fstart' will point to 2075 * the malloc'd array while 'ftail' is used during the recursive traversal. 2076 / 2077static struct bpf_insn fstart; 2078static struct bpf_insn ftail; 2079* 2080#ifdef BDEBUG 2081int bids[1000]; 2082#endif 2083 2084/* 2085 * Returns true if successful. Returns false if a branch has 2086 * an offset that is too large. If so, we have marked that 2087 * branch so that on a subsequent iteration, it will be treated 2088 * properly. 2089 / 2090static int 2091convert_code_r(p) 2092* struct block p; 2093{ 2094* struct bpf_insn dst; 2095* struct slist src; 2096* int slen; 2097 u_int off; 2098 int extrajmps; /* number of extra jumps inserted / 2099* struct slist *offset = NULL; 2100* 2101 if (p == 0 \|\| isMarked(p)) 2102 return (1); 2103 Mark(p); 2104 2105 if (convert_code_r(JF(p)) == 0) 2106 return (0); 2107 if (convert_code_r(JT(p)) == 0) 2108 return (0); 2109 2110 slen = slength(p->stmts); 2111 dst = ftail -= (slen + 1 + p->longjt + p->longjf); 2112 /* inflate length by any extra jumps / 2113* 2114 p->offset = dst - fstart; 2115 2116 /* generate offset[] for convenience / 2117* if (slen) { 2118 offset = (struct slist *)calloc(slen, sizeof(struct slist )); 2119 if (!offset) { 2120 bpf_error("not enough core"); 2121 /NOTREACHED/ 2122 } 2123 } 2124 src = p->stmts; 2125 for (off = 0; off < slen && src; off++) { 2126#if 0 2127 printf("off=%d src=%x\n", off, src); 2128#endif 2129 offset[off] = src; 2130 src = src->next; 2131 } 2132 2133 off = 0; 2134 for (src = p->stmts; src; src = src->next) { 2135 if (src->s.code == NOP) 2136 continue; 2137 dst->code = (u_short)src->s.code; 2138 dst->k = src->s.k; 2139 2140 /* fill block-local relative jump / 2141* if (BPF_CLASS(src->s.code) != BPF_JMP \|\| src->s.code == (BPF_JMP\|BPF_JA)) { 2142#if 0 2143 if (src->s.jt \|\| src->s.jf) { 2144 bpf_error("illegal jmp destination"); 2145 /NOTREACHED/ 2146 } 2147#endif 2148 goto filled; 2149 } 2150 if (off == slen - 2) /???/ 2151 goto filled; 2152 2153 { 2154 int i; 2155 int jt, jf; 2156 const char ljerr = "%s for block-local relative jump: off=%d"; 2157* 2158#if 0 2159 printf("code=%x off=%d %x %x\n", src->s.code, 2160 off, src->s.jt, src->s.jf); 2161#endif 2162 2163 if (!src->s.jt \|\| !src->s.jf) { 2164 bpf_error(ljerr, "no jmp destination", off); 2165 /NOTREACHED/ 2166 } 2167 2168 jt = jf = 0; 2169 for (i = 0; i < slen; i++) { 2170 if (offset[i] == src->s.jt) { 2171 if (jt) { 2172 bpf_error(ljerr, "multiple matches", off); 2173 /NOTREACHED/ 2174 } 2175 2176 dst->jt = i - off - 1; 2177 jt++; 2178 } 2179 if (offset[i] == src->s.jf) { 2180 if (jf) { 2181 bpf_error(ljerr, "multiple matches", off); 2182 /NOTREACHED/ 2183 } 2184 dst->jf = i - off - 1; 2185 jf++; 2186 } 2187 } 2188 if (!jt \|\| !jf) { 2189 bpf_error(ljerr, "no destination found", off); 2190 /NOTREACHED/ 2191 } 2192 } 2193filled: 2194 ++dst; 2195 ++off; 2196 } 2197 if (offset) 2198 free(offset); 2199 2200#ifdef BDEBUG 2201 bids[dst - fstart] = p->id + 1; 2202#endif 2203 dst->code = (u_short)p->s.code; 2204 dst->k = p->s.k; 2205 if (JT(p)) { 2206 extrajmps = 0; 2207 off = JT(p)->offset - (p->offset + slen) - 1; 2208 if (off >= 256) { 2209 /* offset too large for branch, must add a jump / 2210* if (p->longjt == 0) { 2211 /* mark this instruction and retry / 2212* p->longjt++; 2213 return(0); 2214 } 2215 /* branch if T to following jump / 2216* dst->jt = extrajmps; 2217 extrajmps++; 2218 dst[extrajmps].code = BPF_JMP\|BPF_JA; 2219 dst[extrajmps].k = off - extrajmps; 2220 } 2221 else 2222 dst->jt = off; 2223 off = JF(p)->offset - (p->offset + slen) - 1; 2224 if (off >= 256) { 2225 /* offset too large for branch, must add a jump / 2226* if (p->longjf == 0) { 2227 /* mark this instruction and retry / 2228* p->longjf++; 2229 return(0); 2230 } 2231 /* branch if F to following jump / 2232* /* if two jumps are inserted, F goes to second one / 2233* dst->jf = extrajmps; 2234 extrajmps++; 2235 dst[extrajmps].code = BPF_JMP\|BPF_JA; 2236 dst[extrajmps].k = off - extrajmps; 2237 } 2238 else 2239 dst->jf = off; 2240 } 2241 return (1); 2242} 2243 2244 2245/* 2246 * Convert flowgraph intermediate representation to the 2247 * BPF array representation. Set lenp to the number of instructions. 2248* * 2249 * This routine does NOT leak the memory pointed to by fp. It must 2250* * not* do free(fp) before returning fp; doing so would make no sense, 2251 * as the BPF array pointed to by the return value of icode_to_fcode() 2252 * must be valid - it's being returned for use in a bpf_program structure. 2253 * 2254 * If it appears that icode_to_fcode() is leaking, the problem is that 2255 * the program using pcap_compile() is failing to free the memory in 2256 * the BPF program when it's done - the leak is in the program, not in 2257 * the routine that happens to be allocating the memory. (By analogy, if 2258 * a program calls fopen() without ever calling fclose() on the FILE , 2259* * it will leak the FILE structure; the leak is not in fopen(), it's in 2260 * the program.) Change the program to use pcap_freecode() when it's 2261 * done with the filter program. See the pcap man page. 2262 / 2263struct bpf_insn 2264icode_to_fcode(root, lenp) 2265 struct block *root;	1978 1979 if (p == 0 \|\| isMarked(p)) 1980 return 0; 1981 Mark(p); 1982 n = count_stmts(JT(p)) + count_stmts(JF(p)); 1983 return slength(p->stmts) + n + 1 + p->longjt + p->longjf; 1984} 1985 1986/* 1987 * Allocate memory. All allocation is done before optimization 1988 * is begun. A linear bound on the size of all data structures is computed 1989 * from the total number of blocks and/or statements. 1990 / 1991static void 1992opt_init(root) 1993* struct block root; 1994{ 1995* bpf_u_int32 p; 1996* int i, n, max_stmts; 1997 1998 /* 1999 * First, count the blocks, so we can malloc an array to map 2000 * block number to block. Then, put the blocks into the array. 2001 / 2002* unMarkAll(); 2003 n = count_blocks(root); 2004 blocks = (struct block *)calloc(n, sizeof(blocks)); 2005 if (blocks == NULL) 2006 bpf_error("malloc"); 2007 unMarkAll(); 2008 n_blocks = 0; 2009 number_blks_r(root); 2010 2011 n_edges = 2 * n_blocks; 2012 edges = (struct edge *)calloc(n_edges, sizeof(edges)); 2013 if (edges == NULL) 2014 bpf_error("malloc"); 2015 2016 /* 2017 * The number of levels is bounded by the number of nodes. 2018 / 2019* levels = (struct block *)calloc(n_blocks, sizeof(levels)); 2020 if (levels == NULL) 2021 bpf_error("malloc"); 2022 2023 edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1; 2024 nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1; 2025 2026 /* XXX / 2027* space = (bpf_u_int32 )malloc(2 n_blocks * nodewords * sizeof(space) 2028* + n_edges * edgewords * sizeof(space)); 2029* if (space == NULL) 2030 bpf_error("malloc"); 2031 p = space; 2032 all_dom_sets = p; 2033 for (i = 0; i < n; ++i) { 2034 blocks[i]->dom = p; 2035 p += nodewords; 2036 } 2037 all_closure_sets = p; 2038 for (i = 0; i < n; ++i) { 2039 blocks[i]->closure = p; 2040 p += nodewords; 2041 } 2042 all_edge_sets = p; 2043 for (i = 0; i < n; ++i) { 2044 register struct block b = blocks[i]; 2045* 2046 b->et.edom = p; 2047 p += edgewords; 2048 b->ef.edom = p; 2049 p += edgewords; 2050 b->et.id = i; 2051 edges[i] = &b->et; 2052 b->ef.id = n_blocks + i; 2053 edges[n_blocks + i] = &b->ef; 2054 b->et.pred = b; 2055 b->ef.pred = b; 2056 } 2057 max_stmts = 0; 2058 for (i = 0; i < n; ++i) 2059 max_stmts += slength(blocks[i]->stmts) + 1; 2060 /* 2061 * We allocate at most 3 value numbers per statement, 2062 * so this is an upper bound on the number of valnodes 2063 * we'll need. 2064 / 2065* maxval = 3 * max_stmts; 2066 vmap = (struct vmapinfo )calloc(maxval, sizeof(vmap)); 2067 vnode_base = (struct valnode )calloc(maxval, sizeof(vnode_base)); 2068 if (vmap == NULL \|\| vnode_base == NULL) 2069 bpf_error("malloc"); 2070} 2071 2072/* 2073 * Some pointers used to convert the basic block form of the code, 2074 * into the array form that BPF requires. 'fstart' will point to 2075 * the malloc'd array while 'ftail' is used during the recursive traversal. 2076 / 2077static struct bpf_insn fstart; 2078static struct bpf_insn ftail; 2079* 2080#ifdef BDEBUG 2081int bids[1000]; 2082#endif 2083 2084/* 2085 * Returns true if successful. Returns false if a branch has 2086 * an offset that is too large. If so, we have marked that 2087 * branch so that on a subsequent iteration, it will be treated 2088 * properly. 2089 / 2090static int 2091convert_code_r(p) 2092* struct block p; 2093{ 2094* struct bpf_insn dst; 2095* struct slist src; 2096* int slen; 2097 u_int off; 2098 int extrajmps; /* number of extra jumps inserted / 2099* struct slist *offset = NULL; 2100* 2101 if (p == 0 \|\| isMarked(p)) 2102 return (1); 2103 Mark(p); 2104 2105 if (convert_code_r(JF(p)) == 0) 2106 return (0); 2107 if (convert_code_r(JT(p)) == 0) 2108 return (0); 2109 2110 slen = slength(p->stmts); 2111 dst = ftail -= (slen + 1 + p->longjt + p->longjf); 2112 /* inflate length by any extra jumps / 2113* 2114 p->offset = dst - fstart; 2115 2116 /* generate offset[] for convenience / 2117* if (slen) { 2118 offset = (struct slist *)calloc(slen, sizeof(struct slist )); 2119 if (!offset) { 2120 bpf_error("not enough core"); 2121 /NOTREACHED/ 2122 } 2123 } 2124 src = p->stmts; 2125 for (off = 0; off < slen && src; off++) { 2126#if 0 2127 printf("off=%d src=%x\n", off, src); 2128#endif 2129 offset[off] = src; 2130 src = src->next; 2131 } 2132 2133 off = 0; 2134 for (src = p->stmts; src; src = src->next) { 2135 if (src->s.code == NOP) 2136 continue; 2137 dst->code = (u_short)src->s.code; 2138 dst->k = src->s.k; 2139 2140 /* fill block-local relative jump / 2141* if (BPF_CLASS(src->s.code) != BPF_JMP \|\| src->s.code == (BPF_JMP\|BPF_JA)) { 2142#if 0 2143 if (src->s.jt \|\| src->s.jf) { 2144 bpf_error("illegal jmp destination"); 2145 /NOTREACHED/ 2146 } 2147#endif 2148 goto filled; 2149 } 2150 if (off == slen - 2) /???/ 2151 goto filled; 2152 2153 { 2154 int i; 2155 int jt, jf; 2156 const char ljerr = "%s for block-local relative jump: off=%d"; 2157* 2158#if 0 2159 printf("code=%x off=%d %x %x\n", src->s.code, 2160 off, src->s.jt, src->s.jf); 2161#endif 2162 2163 if (!src->s.jt \|\| !src->s.jf) { 2164 bpf_error(ljerr, "no jmp destination", off); 2165 /NOTREACHED/ 2166 } 2167 2168 jt = jf = 0; 2169 for (i = 0; i < slen; i++) { 2170 if (offset[i] == src->s.jt) { 2171 if (jt) { 2172 bpf_error(ljerr, "multiple matches", off); 2173 /NOTREACHED/ 2174 } 2175 2176 dst->jt = i - off - 1; 2177 jt++; 2178 } 2179 if (offset[i] == src->s.jf) { 2180 if (jf) { 2181 bpf_error(ljerr, "multiple matches", off); 2182 /NOTREACHED/ 2183 } 2184 dst->jf = i - off - 1; 2185 jf++; 2186 } 2187 } 2188 if (!jt \|\| !jf) { 2189 bpf_error(ljerr, "no destination found", off); 2190 /NOTREACHED/ 2191 } 2192 } 2193filled: 2194 ++dst; 2195 ++off; 2196 } 2197 if (offset) 2198 free(offset); 2199 2200#ifdef BDEBUG 2201 bids[dst - fstart] = p->id + 1; 2202#endif 2203 dst->code = (u_short)p->s.code; 2204 dst->k = p->s.k; 2205 if (JT(p)) { 2206 extrajmps = 0; 2207 off = JT(p)->offset - (p->offset + slen) - 1; 2208 if (off >= 256) { 2209 /* offset too large for branch, must add a jump / 2210* if (p->longjt == 0) { 2211 /* mark this instruction and retry / 2212* p->longjt++; 2213 return(0); 2214 } 2215 /* branch if T to following jump / 2216* dst->jt = extrajmps; 2217 extrajmps++; 2218 dst[extrajmps].code = BPF_JMP\|BPF_JA; 2219 dst[extrajmps].k = off - extrajmps; 2220 } 2221 else 2222 dst->jt = off; 2223 off = JF(p)->offset - (p->offset + slen) - 1; 2224 if (off >= 256) { 2225 /* offset too large for branch, must add a jump / 2226* if (p->longjf == 0) { 2227 /* mark this instruction and retry / 2228* p->longjf++; 2229 return(0); 2230 } 2231 /* branch if F to following jump / 2232* /* if two jumps are inserted, F goes to second one / 2233* dst->jf = extrajmps; 2234 extrajmps++; 2235 dst[extrajmps].code = BPF_JMP\|BPF_JA; 2236 dst[extrajmps].k = off - extrajmps; 2237 } 2238 else 2239 dst->jf = off; 2240 } 2241 return (1); 2242} 2243 2244 2245/* 2246 * Convert flowgraph intermediate representation to the 2247 * BPF array representation. Set lenp to the number of instructions. 2248* * 2249 * This routine does NOT leak the memory pointed to by fp. It must 2250* * not* do free(fp) before returning fp; doing so would make no sense, 2251 * as the BPF array pointed to by the return value of icode_to_fcode() 2252 * must be valid - it's being returned for use in a bpf_program structure. 2253 * 2254 * If it appears that icode_to_fcode() is leaking, the problem is that 2255 * the program using pcap_compile() is failing to free the memory in 2256 * the BPF program when it's done - the leak is in the program, not in 2257 * the routine that happens to be allocating the memory. (By analogy, if 2258 * a program calls fopen() without ever calling fclose() on the FILE , 2259* * it will leak the FILE structure; the leak is not in fopen(), it's in 2260 * the program.) Change the program to use pcap_freecode() when it's 2261 * done with the filter program. See the pcap man page. 2262 / 2263struct bpf_insn 2264icode_to_fcode(root, lenp) 2265 struct block *root;
2266 int *lenp;	2266 u_int *lenp;
2267{	2267{
2268 int n;	2268 u_int n;
2269 struct bpf_insn fp; 2270* 2271 /* 2272 * Loop doing convert_code_r() until no branches remain 2273 * with too-large offsets. 2274 / 2275* while (1) { 2276 unMarkAll(); 2277 n = lenp = count_stmts(root); 2278* 2279 fp = (struct bpf_insn )malloc(sizeof(fp) * n); 2280 if (fp == NULL) 2281 bpf_error("malloc"); 2282 memset((char )fp, 0, sizeof(fp) * n); 2283 fstart = fp; 2284 ftail = fp + n; 2285 2286 unMarkAll(); 2287 if (convert_code_r(root)) 2288 break; 2289 free(fp); 2290 } 2291 2292 return fp; 2293} 2294 2295/* 2296 * Make a copy of a BPF program and put it in the "fcode" member of 2297 * a "pcap_t". 2298 * 2299 * If we fail to allocate memory for the copy, fill in the "errbuf" 2300 * member of the "pcap_t" with an error message, and return -1; 2301 * otherwise, return 0. 2302 / 2303int 2304install_bpf_program(pcap_t p, struct bpf_program fp) 2305{ 2306* size_t prog_size; 2307 2308 /* 2309 * Validate the program. 2310 / 2311* if (!bpf_validate(fp->bf_insns, fp->bf_len)) { 2312 snprintf(p->errbuf, sizeof(p->errbuf), 2313 "BPF program is not valid"); 2314 return (-1); 2315 } 2316 2317 /* 2318 * Free up any already installed program. 2319 / 2320* pcap_freecode(&p->fcode); 2321 2322 prog_size = sizeof(fp->bf_insns) fp->bf_len; 2323 p->fcode.bf_len = fp->bf_len; 2324 p->fcode.bf_insns = (struct bpf_insn )malloc(prog_size); 2325* if (p->fcode.bf_insns == NULL) { 2326 snprintf(p->errbuf, sizeof(p->errbuf), 2327 "malloc: %s", pcap_strerror(errno)); 2328 return (-1); 2329 } 2330 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size); 2331 return (0); 2332} 2333 2334#ifdef BDEBUG 2335static void 2336opt_dump(root) 2337 struct block root; 2338{ 2339* struct bpf_program f; 2340 2341 memset(bids, 0, sizeof bids); 2342 f.bf_insns = icode_to_fcode(root, &f.bf_len); 2343 bpf_dump(&f, 1); 2344 putchar('\n'); 2345 free((char )f.bf_insns); 2346} 2347*#endif	2269 struct bpf_insn fp; 2270* 2271 /* 2272 * Loop doing convert_code_r() until no branches remain 2273 * with too-large offsets. 2274 / 2275* while (1) { 2276 unMarkAll(); 2277 n = lenp = count_stmts(root); 2278* 2279 fp = (struct bpf_insn )malloc(sizeof(fp) * n); 2280 if (fp == NULL) 2281 bpf_error("malloc"); 2282 memset((char )fp, 0, sizeof(fp) * n); 2283 fstart = fp; 2284 ftail = fp + n; 2285 2286 unMarkAll(); 2287 if (convert_code_r(root)) 2288 break; 2289 free(fp); 2290 } 2291 2292 return fp; 2293} 2294 2295/* 2296 * Make a copy of a BPF program and put it in the "fcode" member of 2297 * a "pcap_t". 2298 * 2299 * If we fail to allocate memory for the copy, fill in the "errbuf" 2300 * member of the "pcap_t" with an error message, and return -1; 2301 * otherwise, return 0. 2302 / 2303int 2304install_bpf_program(pcap_t p, struct bpf_program fp) 2305{ 2306* size_t prog_size; 2307 2308 /* 2309 * Validate the program. 2310 / 2311* if (!bpf_validate(fp->bf_insns, fp->bf_len)) { 2312 snprintf(p->errbuf, sizeof(p->errbuf), 2313 "BPF program is not valid"); 2314 return (-1); 2315 } 2316 2317 /* 2318 * Free up any already installed program. 2319 / 2320* pcap_freecode(&p->fcode); 2321 2322 prog_size = sizeof(fp->bf_insns) fp->bf_len; 2323 p->fcode.bf_len = fp->bf_len; 2324 p->fcode.bf_insns = (struct bpf_insn )malloc(prog_size); 2325* if (p->fcode.bf_insns == NULL) { 2326 snprintf(p->errbuf, sizeof(p->errbuf), 2327 "malloc: %s", pcap_strerror(errno)); 2328 return (-1); 2329 } 2330 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size); 2331 return (0); 2332} 2333 2334#ifdef BDEBUG 2335static void 2336opt_dump(root) 2337 struct block root; 2338{ 2339* struct bpf_program f; 2340 2341 memset(bids, 0, sizeof bids); 2342 f.bf_insns = icode_to_fcode(root, &f.bf_len); 2343 bpf_dump(&f, 1); 2344 putchar('\n'); 2345 free((char )f.bf_insns); 2346} 2347*#endif